add Eigen as a dependency
This commit is contained in:
673
external/include/eigen3/Eigen/src/Cholesky/LDLT.h
vendored
Normal file
673
external/include/eigen3/Eigen/src/Cholesky/LDLT.h
vendored
Normal file
@@ -0,0 +1,673 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
|
||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2011 Timothy E. Holy <tim.holy@gmail.com >
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_LDLT_H
|
||||
#define EIGEN_LDLT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType, int UpLo> struct LDLT_Traits;
|
||||
|
||||
// PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef
|
||||
enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite };
|
||||
}
|
||||
|
||||
/** \ingroup Cholesky_Module
|
||||
*
|
||||
* \class LDLT
|
||||
*
|
||||
* \brief Robust Cholesky decomposition of a matrix with pivoting
|
||||
*
|
||||
* \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
|
||||
* matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L
|
||||
* is lower triangular with a unit diagonal and D is a diagonal matrix.
|
||||
*
|
||||
* The decomposition uses pivoting to ensure stability, so that L will have
|
||||
* zeros in the bottom right rank(A) - n submatrix. Avoiding the square root
|
||||
* on D also stabilizes the computation.
|
||||
*
|
||||
* Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
|
||||
* decomposition to determine whether a system of equations has a solution.
|
||||
*
|
||||
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
|
||||
*
|
||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LDLT
|
||||
{
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
UpLo = _UpLo
|
||||
};
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef Matrix<Scalar, RowsAtCompileTime, 1, 0, MaxRowsAtCompileTime, 1> TmpMatrixType;
|
||||
|
||||
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
|
||||
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
|
||||
|
||||
typedef internal::LDLT_Traits<MatrixType,UpLo> Traits;
|
||||
|
||||
/** \brief Default Constructor.
|
||||
*
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
* perform decompositions via LDLT::compute(const MatrixType&).
|
||||
*/
|
||||
LDLT()
|
||||
: m_matrix(),
|
||||
m_transpositions(),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
{}
|
||||
|
||||
/** \brief Default Constructor with memory preallocation
|
||||
*
|
||||
* Like the default constructor but with preallocation of the internal data
|
||||
* according to the specified problem \a size.
|
||||
* \sa LDLT()
|
||||
*/
|
||||
explicit LDLT(Index size)
|
||||
: m_matrix(size, size),
|
||||
m_transpositions(size),
|
||||
m_temporary(size),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
{}
|
||||
|
||||
/** \brief Constructor with decomposition
|
||||
*
|
||||
* This calculates the decomposition for the input \a matrix.
|
||||
*
|
||||
* \sa LDLT(Index size)
|
||||
*/
|
||||
template<typename InputType>
|
||||
explicit LDLT(const EigenBase<InputType>& matrix)
|
||||
: m_matrix(matrix.rows(), matrix.cols()),
|
||||
m_transpositions(matrix.rows()),
|
||||
m_temporary(matrix.rows()),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
{
|
||||
compute(matrix.derived());
|
||||
}
|
||||
|
||||
/** \brief Constructs a LDLT factorization from a given matrix
|
||||
*
|
||||
* This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
|
||||
*
|
||||
* \sa LDLT(const EigenBase&)
|
||||
*/
|
||||
template<typename InputType>
|
||||
explicit LDLT(EigenBase<InputType>& matrix)
|
||||
: m_matrix(matrix.derived()),
|
||||
m_transpositions(matrix.rows()),
|
||||
m_temporary(matrix.rows()),
|
||||
m_sign(internal::ZeroSign),
|
||||
m_isInitialized(false)
|
||||
{
|
||||
compute(matrix.derived());
|
||||
}
|
||||
|
||||
/** Clear any existing decomposition
|
||||
* \sa rankUpdate(w,sigma)
|
||||
*/
|
||||
void setZero()
|
||||
{
|
||||
m_isInitialized = false;
|
||||
}
|
||||
|
||||
/** \returns a view of the upper triangular matrix U */
|
||||
inline typename Traits::MatrixU matrixU() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return Traits::getU(m_matrix);
|
||||
}
|
||||
|
||||
/** \returns a view of the lower triangular matrix L */
|
||||
inline typename Traits::MatrixL matrixL() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return Traits::getL(m_matrix);
|
||||
}
|
||||
|
||||
/** \returns the permutation matrix P as a transposition sequence.
|
||||
*/
|
||||
inline const TranspositionType& transpositionsP() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_transpositions;
|
||||
}
|
||||
|
||||
/** \returns the coefficients of the diagonal matrix D */
|
||||
inline Diagonal<const MatrixType> vectorD() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_matrix.diagonal();
|
||||
}
|
||||
|
||||
/** \returns true if the matrix is positive (semidefinite) */
|
||||
inline bool isPositive() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
|
||||
}
|
||||
|
||||
/** \returns true if the matrix is negative (semidefinite) */
|
||||
inline bool isNegative(void) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign;
|
||||
}
|
||||
|
||||
/** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
|
||||
*
|
||||
* This function also supports in-place solves using the syntax <tt>x = decompositionObject.solve(x)</tt> .
|
||||
*
|
||||
* \note_about_checking_solutions
|
||||
*
|
||||
* More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
|
||||
* by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
|
||||
* \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
|
||||
* \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
|
||||
* least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
|
||||
* computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
|
||||
*
|
||||
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
|
||||
*/
|
||||
template<typename Rhs>
|
||||
inline const Solve<LDLT, Rhs>
|
||||
solve(const MatrixBase<Rhs>& b) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
eigen_assert(m_matrix.rows()==b.rows()
|
||||
&& "LDLT::solve(): invalid number of rows of the right hand side matrix b");
|
||||
return Solve<LDLT, Rhs>(*this, b.derived());
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
bool solveInPlace(MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
template<typename InputType>
|
||||
LDLT& compute(const EigenBase<InputType>& matrix);
|
||||
|
||||
/** \returns an estimate of the reciprocal condition number of the matrix of
|
||||
* which \c *this is the LDLT decomposition.
|
||||
*/
|
||||
RealScalar rcond() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return internal::rcond_estimate_helper(m_l1_norm, *this);
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
|
||||
|
||||
/** \returns the internal LDLT decomposition matrix
|
||||
*
|
||||
* TODO: document the storage layout
|
||||
*/
|
||||
inline const MatrixType& matrixLDLT() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
MatrixType reconstructedMatrix() const;
|
||||
|
||||
/** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
|
||||
*
|
||||
* This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
|
||||
* \code x = decomposition.adjoint().solve(b) \endcode
|
||||
*/
|
||||
const LDLT& adjoint() const { return *this; };
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the factorization failed because of a zero pivot.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
return m_info;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename RhsType, typename DstType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void _solve_impl(const RhsType &rhs, DstType &dst) const;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
|
||||
* The strict upper part is used during the decomposition, the strict lower
|
||||
* part correspond to the coefficients of L (its diagonal is equal to 1 and
|
||||
* is not stored), and the diagonal entries correspond to D.
|
||||
*/
|
||||
MatrixType m_matrix;
|
||||
RealScalar m_l1_norm;
|
||||
TranspositionType m_transpositions;
|
||||
TmpMatrixType m_temporary;
|
||||
internal::SignMatrix m_sign;
|
||||
bool m_isInitialized;
|
||||
ComputationInfo m_info;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int UpLo> struct ldlt_inplace;
|
||||
|
||||
template<> struct ldlt_inplace<Lower>
|
||||
{
|
||||
template<typename MatrixType, typename TranspositionType, typename Workspace>
|
||||
static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
|
||||
{
|
||||
using std::abs;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef typename TranspositionType::StorageIndex IndexType;
|
||||
eigen_assert(mat.rows()==mat.cols());
|
||||
const Index size = mat.rows();
|
||||
bool found_zero_pivot = false;
|
||||
bool ret = true;
|
||||
|
||||
if (size <= 1)
|
||||
{
|
||||
transpositions.setIdentity();
|
||||
if(size==0) sign = ZeroSign;
|
||||
else if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
|
||||
else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
|
||||
else sign = ZeroSign;
|
||||
return true;
|
||||
}
|
||||
|
||||
for (Index k = 0; k < size; ++k)
|
||||
{
|
||||
// Find largest diagonal element
|
||||
Index index_of_biggest_in_corner;
|
||||
mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
|
||||
index_of_biggest_in_corner += k;
|
||||
|
||||
transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner);
|
||||
if(k != index_of_biggest_in_corner)
|
||||
{
|
||||
// apply the transposition while taking care to consider only
|
||||
// the lower triangular part
|
||||
Index s = size-index_of_biggest_in_corner-1; // trailing size after the biggest element
|
||||
mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k));
|
||||
mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s));
|
||||
std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner));
|
||||
for(Index i=k+1;i<index_of_biggest_in_corner;++i)
|
||||
{
|
||||
Scalar tmp = mat.coeffRef(i,k);
|
||||
mat.coeffRef(i,k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner,i));
|
||||
mat.coeffRef(index_of_biggest_in_corner,i) = numext::conj(tmp);
|
||||
}
|
||||
if(NumTraits<Scalar>::IsComplex)
|
||||
mat.coeffRef(index_of_biggest_in_corner,k) = numext::conj(mat.coeff(index_of_biggest_in_corner,k));
|
||||
}
|
||||
|
||||
// partition the matrix:
|
||||
// A00 | - | -
|
||||
// lu = A10 | A11 | -
|
||||
// A20 | A21 | A22
|
||||
Index rs = size - k - 1;
|
||||
Block<MatrixType,Dynamic,1> A21(mat,k+1,k,rs,1);
|
||||
Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);
|
||||
Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);
|
||||
|
||||
if(k>0)
|
||||
{
|
||||
temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint();
|
||||
mat.coeffRef(k,k) -= (A10 * temp.head(k)).value();
|
||||
if(rs>0)
|
||||
A21.noalias() -= A20 * temp.head(k);
|
||||
}
|
||||
|
||||
// In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
|
||||
// was smaller than the cutoff value. However, since LDLT is not rank-revealing
|
||||
// we should only make sure that we do not introduce INF or NaN values.
|
||||
// Remark that LAPACK also uses 0 as the cutoff value.
|
||||
RealScalar realAkk = numext::real(mat.coeffRef(k,k));
|
||||
bool pivot_is_valid = (abs(realAkk) > RealScalar(0));
|
||||
|
||||
if(k==0 && !pivot_is_valid)
|
||||
{
|
||||
// The entire diagonal is zero, there is nothing more to do
|
||||
// except filling the transpositions, and checking whether the matrix is zero.
|
||||
sign = ZeroSign;
|
||||
for(Index j = 0; j<size; ++j)
|
||||
{
|
||||
transpositions.coeffRef(j) = IndexType(j);
|
||||
ret = ret && (mat.col(j).tail(size-j-1).array()==Scalar(0)).all();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
if((rs>0) && pivot_is_valid)
|
||||
A21 /= realAkk;
|
||||
else if(rs>0)
|
||||
ret = ret && (A21.array()==Scalar(0)).all();
|
||||
|
||||
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
|
||||
else if(!pivot_is_valid) found_zero_pivot = true;
|
||||
|
||||
if (sign == PositiveSemiDef) {
|
||||
if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
|
||||
} else if (sign == NegativeSemiDef) {
|
||||
if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite;
|
||||
} else if (sign == ZeroSign) {
|
||||
if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef;
|
||||
else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Reference for the algorithm: Davis and Hager, "Multiple Rank
|
||||
// Modifications of a Sparse Cholesky Factorization" (Algorithm 1)
|
||||
// Trivial rearrangements of their computations (Timothy E. Holy)
|
||||
// allow their algorithm to work for rank-1 updates even if the
|
||||
// original matrix is not of full rank.
|
||||
// Here only rank-1 updates are implemented, to reduce the
|
||||
// requirement for intermediate storage and improve accuracy
|
||||
template<typename MatrixType, typename WDerived>
|
||||
static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, const typename MatrixType::RealScalar& sigma=1)
|
||||
{
|
||||
using numext::isfinite;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
|
||||
const Index size = mat.rows();
|
||||
eigen_assert(mat.cols() == size && w.size()==size);
|
||||
|
||||
RealScalar alpha = 1;
|
||||
|
||||
// Apply the update
|
||||
for (Index j = 0; j < size; j++)
|
||||
{
|
||||
// Check for termination due to an original decomposition of low-rank
|
||||
if (!(isfinite)(alpha))
|
||||
break;
|
||||
|
||||
// Update the diagonal terms
|
||||
RealScalar dj = numext::real(mat.coeff(j,j));
|
||||
Scalar wj = w.coeff(j);
|
||||
RealScalar swj2 = sigma*numext::abs2(wj);
|
||||
RealScalar gamma = dj*alpha + swj2;
|
||||
|
||||
mat.coeffRef(j,j) += swj2/alpha;
|
||||
alpha += swj2/dj;
|
||||
|
||||
|
||||
// Update the terms of L
|
||||
Index rs = size-j-1;
|
||||
w.tail(rs) -= wj * mat.col(j).tail(rs);
|
||||
if(gamma != 0)
|
||||
mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
|
||||
static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1)
|
||||
{
|
||||
// Apply the permutation to the input w
|
||||
tmp = transpositions * w;
|
||||
|
||||
return ldlt_inplace<Lower>::updateInPlace(mat,tmp,sigma);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct ldlt_inplace<Upper>
|
||||
{
|
||||
template<typename MatrixType, typename TranspositionType, typename Workspace>
|
||||
static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
|
||||
}
|
||||
|
||||
template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
|
||||
static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
|
||||
{
|
||||
typedef const TriangularView<const MatrixType, UnitLower> MatrixL;
|
||||
typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
|
||||
static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
|
||||
static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
|
||||
};
|
||||
|
||||
template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
|
||||
{
|
||||
typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
|
||||
typedef const TriangularView<const MatrixType, UnitUpper> MatrixU;
|
||||
static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
|
||||
static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename InputType>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
|
||||
m_matrix = a.derived();
|
||||
|
||||
// Compute matrix L1 norm = max abs column sum.
|
||||
m_l1_norm = RealScalar(0);
|
||||
// TODO move this code to SelfAdjointView
|
||||
for (Index col = 0; col < size; ++col) {
|
||||
RealScalar abs_col_sum;
|
||||
if (_UpLo == Lower)
|
||||
abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
|
||||
else
|
||||
abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
|
||||
if (abs_col_sum > m_l1_norm)
|
||||
m_l1_norm = abs_col_sum;
|
||||
}
|
||||
|
||||
m_transpositions.resize(size);
|
||||
m_isInitialized = false;
|
||||
m_temporary.resize(size);
|
||||
m_sign = internal::ZeroSign;
|
||||
|
||||
m_info = internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue;
|
||||
|
||||
m_isInitialized = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T.
|
||||
* \param w a vector to be incorporated into the decomposition.
|
||||
* \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
|
||||
* \sa setZero()
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
|
||||
{
|
||||
typedef typename TranspositionType::StorageIndex IndexType;
|
||||
const Index size = w.rows();
|
||||
if (m_isInitialized)
|
||||
{
|
||||
eigen_assert(m_matrix.rows()==size);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_matrix.resize(size,size);
|
||||
m_matrix.setZero();
|
||||
m_transpositions.resize(size);
|
||||
for (Index i = 0; i < size; i++)
|
||||
m_transpositions.coeffRef(i) = IndexType(i);
|
||||
m_temporary.resize(size);
|
||||
m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
|
||||
m_isInitialized = true;
|
||||
}
|
||||
|
||||
internal::ldlt_inplace<UpLo>::update(m_matrix, m_transpositions, m_temporary, w, sigma);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename _MatrixType, int _UpLo>
|
||||
template<typename RhsType, typename DstType>
|
||||
void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
{
|
||||
eigen_assert(rhs.rows() == rows());
|
||||
// dst = P b
|
||||
dst = m_transpositions * rhs;
|
||||
|
||||
// dst = L^-1 (P b)
|
||||
matrixL().solveInPlace(dst);
|
||||
|
||||
// dst = D^-1 (L^-1 P b)
|
||||
// more precisely, use pseudo-inverse of D (see bug 241)
|
||||
using std::abs;
|
||||
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
|
||||
// In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
|
||||
// and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
|
||||
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
|
||||
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
|
||||
// diagonal element is not well justified and leads to numerical issues in some cases.
|
||||
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
|
||||
// Using numeric_limits::min() gives us more robustness to denormals.
|
||||
RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
|
||||
|
||||
for (Index i = 0; i < vecD.size(); ++i)
|
||||
{
|
||||
if(abs(vecD(i)) > tolerance)
|
||||
dst.row(i) /= vecD(i);
|
||||
else
|
||||
dst.row(i).setZero();
|
||||
}
|
||||
|
||||
// dst = L^-T (D^-1 L^-1 P b)
|
||||
matrixU().solveInPlace(dst);
|
||||
|
||||
// dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
|
||||
dst = m_transpositions.transpose() * dst;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \internal use x = ldlt_object.solve(x);
|
||||
*
|
||||
* This is the \em in-place version of solve().
|
||||
*
|
||||
* \param bAndX represents both the right-hand side matrix b and result x.
|
||||
*
|
||||
* \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
|
||||
*
|
||||
* This version avoids a copy when the right hand side matrix b is not
|
||||
* needed anymore.
|
||||
*
|
||||
* \sa LDLT::solve(), MatrixBase::ldlt()
|
||||
*/
|
||||
template<typename MatrixType,int _UpLo>
|
||||
template<typename Derived>
|
||||
bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
eigen_assert(m_matrix.rows() == bAndX.rows());
|
||||
|
||||
bAndX = this->solve(bAndX);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
* i.e., it returns the product: P^T L D L^* P.
|
||||
* This function is provided for debug purpose. */
|
||||
template<typename MatrixType, int _UpLo>
|
||||
MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LDLT is not initialized.");
|
||||
const Index size = m_matrix.rows();
|
||||
MatrixType res(size,size);
|
||||
|
||||
// P
|
||||
res.setIdentity();
|
||||
res = transpositionsP() * res;
|
||||
// L^* P
|
||||
res = matrixU() * res;
|
||||
// D(L^*P)
|
||||
res = vectorD().real().asDiagonal() * res;
|
||||
// L(DL^*P)
|
||||
res = matrixL() * res;
|
||||
// P^T (LDL^*P)
|
||||
res = transpositionsP().transpose() * res;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
|
||||
* \sa MatrixBase::ldlt()
|
||||
*/
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
inline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
|
||||
SelfAdjointView<MatrixType, UpLo>::ldlt() const
|
||||
{
|
||||
return LDLT<PlainObject,UpLo>(m_matrix);
|
||||
}
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the Cholesky decomposition with full pivoting without square root of \c *this
|
||||
* \sa SelfAdjointView::ldlt()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const LDLT<typename MatrixBase<Derived>::PlainObject>
|
||||
MatrixBase<Derived>::ldlt() const
|
||||
{
|
||||
return LDLT<PlainObject>(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_LDLT_H
|
||||
542
external/include/eigen3/Eigen/src/Cholesky/LLT.h
vendored
Normal file
542
external/include/eigen3/Eigen/src/Cholesky/LLT.h
vendored
Normal file
@@ -0,0 +1,542 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_LLT_H
|
||||
#define EIGEN_LLT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal{
|
||||
template<typename MatrixType, int UpLo> struct LLT_Traits;
|
||||
}
|
||||
|
||||
/** \ingroup Cholesky_Module
|
||||
*
|
||||
* \class LLT
|
||||
*
|
||||
* \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
|
||||
*
|
||||
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
|
||||
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
|
||||
* The other triangular part won't be read.
|
||||
*
|
||||
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
|
||||
* matrix A such that A = LL^* = U^*U, where L is lower triangular.
|
||||
*
|
||||
* While the Cholesky decomposition is particularly useful to solve selfadjoint problems like D^*D x = b,
|
||||
* for that purpose, we recommend the Cholesky decomposition without square root which is more stable
|
||||
* and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other
|
||||
* situations like generalised eigen problems with hermitian matrices.
|
||||
*
|
||||
* Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive definite matrices,
|
||||
* use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
|
||||
* has a solution.
|
||||
*
|
||||
* Example: \include LLT_example.cpp
|
||||
* Output: \verbinclude LLT_example.out
|
||||
*
|
||||
* \b Performance: for best performance, it is recommended to use a column-major storage format
|
||||
* with the Lower triangular part (the default), or, equivalently, a row-major storage format
|
||||
* with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
|
||||
* step, and rank-updates can be up to 3 times slower.
|
||||
*
|
||||
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
|
||||
*
|
||||
* Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
|
||||
* Therefore, the strict lower part does not have to store correct values.
|
||||
*
|
||||
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo> class LLT
|
||||
{
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
|
||||
};
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
|
||||
enum {
|
||||
PacketSize = internal::packet_traits<Scalar>::size,
|
||||
AlignmentMask = int(PacketSize)-1,
|
||||
UpLo = _UpLo
|
||||
};
|
||||
|
||||
typedef internal::LLT_Traits<MatrixType,UpLo> Traits;
|
||||
|
||||
/**
|
||||
* \brief Default Constructor.
|
||||
*
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
* perform decompositions via LLT::compute(const MatrixType&).
|
||||
*/
|
||||
LLT() : m_matrix(), m_isInitialized(false) {}
|
||||
|
||||
/** \brief Default Constructor with memory preallocation
|
||||
*
|
||||
* Like the default constructor but with preallocation of the internal data
|
||||
* according to the specified problem \a size.
|
||||
* \sa LLT()
|
||||
*/
|
||||
explicit LLT(Index size) : m_matrix(size, size),
|
||||
m_isInitialized(false) {}
|
||||
|
||||
template<typename InputType>
|
||||
explicit LLT(const EigenBase<InputType>& matrix)
|
||||
: m_matrix(matrix.rows(), matrix.cols()),
|
||||
m_isInitialized(false)
|
||||
{
|
||||
compute(matrix.derived());
|
||||
}
|
||||
|
||||
/** \brief Constructs a LDLT factorization from a given matrix
|
||||
*
|
||||
* This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when
|
||||
* \c MatrixType is a Eigen::Ref.
|
||||
*
|
||||
* \sa LLT(const EigenBase&)
|
||||
*/
|
||||
template<typename InputType>
|
||||
explicit LLT(EigenBase<InputType>& matrix)
|
||||
: m_matrix(matrix.derived()),
|
||||
m_isInitialized(false)
|
||||
{
|
||||
compute(matrix.derived());
|
||||
}
|
||||
|
||||
/** \returns a view of the upper triangular matrix U */
|
||||
inline typename Traits::MatrixU matrixU() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
return Traits::getU(m_matrix);
|
||||
}
|
||||
|
||||
/** \returns a view of the lower triangular matrix L */
|
||||
inline typename Traits::MatrixL matrixL() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
return Traits::getL(m_matrix);
|
||||
}
|
||||
|
||||
/** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
|
||||
*
|
||||
* Since this LLT class assumes anyway that the matrix A is invertible, the solution
|
||||
* theoretically exists and is unique regardless of b.
|
||||
*
|
||||
* Example: \include LLT_solve.cpp
|
||||
* Output: \verbinclude LLT_solve.out
|
||||
*
|
||||
* \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
|
||||
*/
|
||||
template<typename Rhs>
|
||||
inline const Solve<LLT, Rhs>
|
||||
solve(const MatrixBase<Rhs>& b) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
eigen_assert(m_matrix.rows()==b.rows()
|
||||
&& "LLT::solve(): invalid number of rows of the right hand side matrix b");
|
||||
return Solve<LLT, Rhs>(*this, b.derived());
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
void solveInPlace(const MatrixBase<Derived> &bAndX) const;
|
||||
|
||||
template<typename InputType>
|
||||
LLT& compute(const EigenBase<InputType>& matrix);
|
||||
|
||||
/** \returns an estimate of the reciprocal condition number of the matrix of
|
||||
* which \c *this is the Cholesky decomposition.
|
||||
*/
|
||||
RealScalar rcond() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative");
|
||||
return internal::rcond_estimate_helper(m_l1_norm, *this);
|
||||
}
|
||||
|
||||
/** \returns the LLT decomposition matrix
|
||||
*
|
||||
* TODO: document the storage layout
|
||||
*/
|
||||
inline const MatrixType& matrixLLT() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
MatrixType reconstructedMatrix() const;
|
||||
|
||||
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the matrix.appears not to be positive definite.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
return m_info;
|
||||
}
|
||||
|
||||
/** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
|
||||
*
|
||||
* This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
|
||||
* \code x = decomposition.adjoint().solve(b) \endcode
|
||||
*/
|
||||
const LLT& adjoint() const { return *this; };
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
template<typename VectorType>
|
||||
LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename RhsType, typename DstType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void _solve_impl(const RhsType &rhs, DstType &dst) const;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
static void check_template_parameters()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Used to compute and store L
|
||||
* The strict upper part is not used and even not initialized.
|
||||
*/
|
||||
MatrixType m_matrix;
|
||||
RealScalar m_l1_norm;
|
||||
bool m_isInitialized;
|
||||
ComputationInfo m_info;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar, int UpLo> struct llt_inplace;
|
||||
|
||||
template<typename MatrixType, typename VectorType>
|
||||
static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)
|
||||
{
|
||||
using std::sqrt;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef typename MatrixType::ColXpr ColXpr;
|
||||
typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
|
||||
typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
|
||||
typedef Matrix<Scalar,Dynamic,1> TempVectorType;
|
||||
typedef typename TempVectorType::SegmentReturnType TempVecSegment;
|
||||
|
||||
Index n = mat.cols();
|
||||
eigen_assert(mat.rows()==n && vec.size()==n);
|
||||
|
||||
TempVectorType temp;
|
||||
|
||||
if(sigma>0)
|
||||
{
|
||||
// This version is based on Givens rotations.
|
||||
// It is faster than the other one below, but only works for updates,
|
||||
// i.e., for sigma > 0
|
||||
temp = sqrt(sigma) * vec;
|
||||
|
||||
for(Index i=0; i<n; ++i)
|
||||
{
|
||||
JacobiRotation<Scalar> g;
|
||||
g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
|
||||
|
||||
Index rs = n-i-1;
|
||||
if(rs>0)
|
||||
{
|
||||
ColXprSegment x(mat.col(i).tail(rs));
|
||||
TempVecSegment y(temp.tail(rs));
|
||||
apply_rotation_in_the_plane(x, y, g);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
temp = vec;
|
||||
RealScalar beta = 1;
|
||||
for(Index j=0; j<n; ++j)
|
||||
{
|
||||
RealScalar Ljj = numext::real(mat.coeff(j,j));
|
||||
RealScalar dj = numext::abs2(Ljj);
|
||||
Scalar wj = temp.coeff(j);
|
||||
RealScalar swj2 = sigma*numext::abs2(wj);
|
||||
RealScalar gamma = dj*beta + swj2;
|
||||
|
||||
RealScalar x = dj + swj2/beta;
|
||||
if (x<=RealScalar(0))
|
||||
return j;
|
||||
RealScalar nLjj = sqrt(x);
|
||||
mat.coeffRef(j,j) = nLjj;
|
||||
beta += swj2/dj;
|
||||
|
||||
// Update the terms of L
|
||||
Index rs = n-j-1;
|
||||
if(rs)
|
||||
{
|
||||
temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
|
||||
if(gamma != 0)
|
||||
mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*numext::conj(wj)/gamma)*temp.tail(rs);
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template<typename Scalar> struct llt_inplace<Scalar, Lower>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
template<typename MatrixType>
|
||||
static Index unblocked(MatrixType& mat)
|
||||
{
|
||||
using std::sqrt;
|
||||
|
||||
eigen_assert(mat.rows()==mat.cols());
|
||||
const Index size = mat.rows();
|
||||
for(Index k = 0; k < size; ++k)
|
||||
{
|
||||
Index rs = size-k-1; // remaining size
|
||||
|
||||
Block<MatrixType,Dynamic,1> A21(mat,k+1,k,rs,1);
|
||||
Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);
|
||||
Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);
|
||||
|
||||
RealScalar x = numext::real(mat.coeff(k,k));
|
||||
if (k>0) x -= A10.squaredNorm();
|
||||
if (x<=RealScalar(0))
|
||||
return k;
|
||||
mat.coeffRef(k,k) = x = sqrt(x);
|
||||
if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
|
||||
if (rs>0) A21 /= x;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
static Index blocked(MatrixType& m)
|
||||
{
|
||||
eigen_assert(m.rows()==m.cols());
|
||||
Index size = m.rows();
|
||||
if(size<32)
|
||||
return unblocked(m);
|
||||
|
||||
Index blockSize = size/8;
|
||||
blockSize = (blockSize/16)*16;
|
||||
blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128));
|
||||
|
||||
for (Index k=0; k<size; k+=blockSize)
|
||||
{
|
||||
// partition the matrix:
|
||||
// A00 | - | -
|
||||
// lu = A10 | A11 | -
|
||||
// A20 | A21 | A22
|
||||
Index bs = (std::min)(blockSize, size-k);
|
||||
Index rs = size - k - bs;
|
||||
Block<MatrixType,Dynamic,Dynamic> A11(m,k, k, bs,bs);
|
||||
Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k, rs,bs);
|
||||
Block<MatrixType,Dynamic,Dynamic> A22(m,k+bs,k+bs,rs,rs);
|
||||
|
||||
Index ret;
|
||||
if((ret=unblocked(A11))>=0) return k+ret;
|
||||
if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);
|
||||
if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,typename NumTraits<RealScalar>::Literal(-1)); // bottleneck
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template<typename MatrixType, typename VectorType>
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
|
||||
{
|
||||
return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar> struct llt_inplace<Scalar, Upper>
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
template<typename MatrixType>
|
||||
static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return llt_inplace<Scalar, Lower>::unblocked(matt);
|
||||
}
|
||||
template<typename MatrixType>
|
||||
static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return llt_inplace<Scalar, Lower>::blocked(matt);
|
||||
}
|
||||
template<typename MatrixType, typename VectorType>
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
|
||||
{
|
||||
Transpose<MatrixType> matt(mat);
|
||||
return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
|
||||
{
|
||||
typedef const TriangularView<const MatrixType, Lower> MatrixL;
|
||||
typedef const TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
|
||||
static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
|
||||
static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
|
||||
static bool inplace_decomposition(MatrixType& m)
|
||||
{ return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
|
||||
};
|
||||
|
||||
template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
|
||||
{
|
||||
typedef const TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
|
||||
typedef const TriangularView<const MatrixType, Upper> MatrixU;
|
||||
static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
|
||||
static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
|
||||
static bool inplace_decomposition(MatrixType& m)
|
||||
{ return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
|
||||
*
|
||||
* \returns a reference to *this
|
||||
*
|
||||
* Example: \include TutorialLinAlgComputeTwice.cpp
|
||||
* Output: \verbinclude TutorialLinAlgComputeTwice.out
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename InputType>
|
||||
LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
|
||||
{
|
||||
check_template_parameters();
|
||||
|
||||
eigen_assert(a.rows()==a.cols());
|
||||
const Index size = a.rows();
|
||||
m_matrix.resize(size, size);
|
||||
if (!internal::is_same_dense(m_matrix, a.derived()))
|
||||
m_matrix = a.derived();
|
||||
|
||||
// Compute matrix L1 norm = max abs column sum.
|
||||
m_l1_norm = RealScalar(0);
|
||||
// TODO move this code to SelfAdjointView
|
||||
for (Index col = 0; col < size; ++col) {
|
||||
RealScalar abs_col_sum;
|
||||
if (_UpLo == Lower)
|
||||
abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
|
||||
else
|
||||
abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
|
||||
if (abs_col_sum > m_l1_norm)
|
||||
m_l1_norm = abs_col_sum;
|
||||
}
|
||||
|
||||
m_isInitialized = true;
|
||||
bool ok = Traits::inplace_decomposition(m_matrix);
|
||||
m_info = ok ? Success : NumericalIssue;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Performs a rank one update (or dowdate) of the current decomposition.
|
||||
* If A = LL^* before the rank one update,
|
||||
* then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
|
||||
* of same dimension.
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo>
|
||||
template<typename VectorType>
|
||||
LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
|
||||
eigen_assert(v.size()==m_matrix.cols());
|
||||
eigen_assert(m_isInitialized);
|
||||
if(internal::llt_inplace<typename MatrixType::Scalar, UpLo>::rankUpdate(m_matrix,v,sigma)>=0)
|
||||
m_info = NumericalIssue;
|
||||
else
|
||||
m_info = Success;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename _MatrixType,int _UpLo>
|
||||
template<typename RhsType, typename DstType>
|
||||
void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
|
||||
{
|
||||
dst = rhs;
|
||||
solveInPlace(dst);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \internal use x = llt_object.solve(x);
|
||||
*
|
||||
* This is the \em in-place version of solve().
|
||||
*
|
||||
* \param bAndX represents both the right-hand side matrix b and result x.
|
||||
*
|
||||
* This version avoids a copy when the right hand side matrix b is not needed anymore.
|
||||
*
|
||||
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
|
||||
* This function will const_cast it, so constness isn't honored here.
|
||||
*
|
||||
* \sa LLT::solve(), MatrixBase::llt()
|
||||
*/
|
||||
template<typename MatrixType, int _UpLo>
|
||||
template<typename Derived>
|
||||
void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
eigen_assert(m_matrix.rows()==bAndX.rows());
|
||||
matrixL().solveInPlace(bAndX);
|
||||
matrixU().solveInPlace(bAndX);
|
||||
}
|
||||
|
||||
/** \returns the matrix represented by the decomposition,
|
||||
* i.e., it returns the product: L L^*.
|
||||
* This function is provided for debug purpose. */
|
||||
template<typename MatrixType, int _UpLo>
|
||||
MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "LLT is not initialized.");
|
||||
return matrixL() * matrixL().adjoint().toDenseMatrix();
|
||||
}
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the LLT decomposition of \c *this
|
||||
* \sa SelfAdjointView::llt()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const LLT<typename MatrixBase<Derived>::PlainObject>
|
||||
MatrixBase<Derived>::llt() const
|
||||
{
|
||||
return LLT<PlainObject>(derived());
|
||||
}
|
||||
|
||||
/** \cholesky_module
|
||||
* \returns the LLT decomposition of \c *this
|
||||
* \sa SelfAdjointView::llt()
|
||||
*/
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
|
||||
SelfAdjointView<MatrixType, UpLo>::llt() const
|
||||
{
|
||||
return LLT<PlainObject,UpLo>(m_matrix);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_LLT_H
|
||||
99
external/include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h
vendored
Normal file
99
external/include/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to LAPACKe
|
||||
* LLt decomposition based on LAPACKE_?potrf function.
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_LLT_LAPACKE_H
|
||||
#define EIGEN_LLT_LAPACKE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar> struct lapacke_llt;
|
||||
|
||||
#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \
|
||||
template<> struct lapacke_llt<EIGTYPE> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static inline Index potrf(MatrixType& m, char uplo) \
|
||||
{ \
|
||||
lapack_int matrix_order; \
|
||||
lapack_int size, lda, info, StorageOrder; \
|
||||
EIGTYPE* a; \
|
||||
eigen_assert(m.rows()==m.cols()); \
|
||||
/* Set up parameters for ?potrf */ \
|
||||
size = convert_index<lapack_int>(m.rows()); \
|
||||
StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
|
||||
matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
|
||||
a = &(m.coeffRef(0,0)); \
|
||||
lda = convert_index<lapack_int>(m.outerStride()); \
|
||||
\
|
||||
info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \
|
||||
info = (info==0) ? -1 : info>0 ? info-1 : size; \
|
||||
return info; \
|
||||
} \
|
||||
}; \
|
||||
template<> struct llt_inplace<EIGTYPE, Lower> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static Index blocked(MatrixType& m) \
|
||||
{ \
|
||||
return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \
|
||||
} \
|
||||
template<typename MatrixType, typename VectorType> \
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
|
||||
{ return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \
|
||||
}; \
|
||||
template<> struct llt_inplace<EIGTYPE, Upper> \
|
||||
{ \
|
||||
template<typename MatrixType> \
|
||||
static Index blocked(MatrixType& m) \
|
||||
{ \
|
||||
return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \
|
||||
} \
|
||||
template<typename MatrixType, typename VectorType> \
|
||||
static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
|
||||
{ \
|
||||
Transpose<MatrixType> matt(mat); \
|
||||
return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_LAPACKE_LLT(double, double, d)
|
||||
EIGEN_LAPACKE_LLT(float, float, s)
|
||||
EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)
|
||||
EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_LLT_LAPACKE_H
|
||||
639
external/include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h
vendored
Normal file
639
external/include/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h
vendored
Normal file
@@ -0,0 +1,639 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CHOLMODSUPPORT_H
|
||||
#define EIGEN_CHOLMODSUPPORT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar> struct cholmod_configure_matrix;
|
||||
|
||||
template<> struct cholmod_configure_matrix<double> {
|
||||
template<typename CholmodType>
|
||||
static void run(CholmodType& mat) {
|
||||
mat.xtype = CHOLMOD_REAL;
|
||||
mat.dtype = CHOLMOD_DOUBLE;
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct cholmod_configure_matrix<std::complex<double> > {
|
||||
template<typename CholmodType>
|
||||
static void run(CholmodType& mat) {
|
||||
mat.xtype = CHOLMOD_COMPLEX;
|
||||
mat.dtype = CHOLMOD_DOUBLE;
|
||||
}
|
||||
};
|
||||
|
||||
// Other scalar types are not yet suppotred by Cholmod
|
||||
// template<> struct cholmod_configure_matrix<float> {
|
||||
// template<typename CholmodType>
|
||||
// static void run(CholmodType& mat) {
|
||||
// mat.xtype = CHOLMOD_REAL;
|
||||
// mat.dtype = CHOLMOD_SINGLE;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// template<> struct cholmod_configure_matrix<std::complex<float> > {
|
||||
// template<typename CholmodType>
|
||||
// static void run(CholmodType& mat) {
|
||||
// mat.xtype = CHOLMOD_COMPLEX;
|
||||
// mat.dtype = CHOLMOD_SINGLE;
|
||||
// }
|
||||
// };
|
||||
|
||||
} // namespace internal
|
||||
|
||||
/** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object.
|
||||
* Note that the data are shared.
|
||||
*/
|
||||
template<typename _Scalar, int _Options, typename _StorageIndex>
|
||||
cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > mat)
|
||||
{
|
||||
cholmod_sparse res;
|
||||
res.nzmax = mat.nonZeros();
|
||||
res.nrow = mat.rows();
|
||||
res.ncol = mat.cols();
|
||||
res.p = mat.outerIndexPtr();
|
||||
res.i = mat.innerIndexPtr();
|
||||
res.x = mat.valuePtr();
|
||||
res.z = 0;
|
||||
res.sorted = 1;
|
||||
if(mat.isCompressed())
|
||||
{
|
||||
res.packed = 1;
|
||||
res.nz = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
res.packed = 0;
|
||||
res.nz = mat.innerNonZeroPtr();
|
||||
}
|
||||
|
||||
res.dtype = 0;
|
||||
res.stype = -1;
|
||||
|
||||
if (internal::is_same<_StorageIndex,int>::value)
|
||||
{
|
||||
res.itype = CHOLMOD_INT;
|
||||
}
|
||||
else if (internal::is_same<_StorageIndex,long>::value)
|
||||
{
|
||||
res.itype = CHOLMOD_LONG;
|
||||
}
|
||||
else
|
||||
{
|
||||
eigen_assert(false && "Index type not supported yet");
|
||||
}
|
||||
|
||||
// setup res.xtype
|
||||
internal::cholmod_configure_matrix<_Scalar>::run(res);
|
||||
|
||||
res.stype = 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Options, typename _Index>
|
||||
const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Options, typename _Index>
|
||||
const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.const_cast_derived()));
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix.
|
||||
* The data are not copied but shared. */
|
||||
template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo>
|
||||
cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat)
|
||||
{
|
||||
cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived()));
|
||||
|
||||
if(UpLo==Upper) res.stype = 1;
|
||||
if(UpLo==Lower) res.stype = -1;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Returns a view of the Eigen \b dense matrix \a mat as Cholmod dense matrix.
|
||||
* The data are not copied but shared. */
|
||||
template<typename Derived>
|
||||
cholmod_dense viewAsCholmod(MatrixBase<Derived>& mat)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::traits<Derived>::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
cholmod_dense res;
|
||||
res.nrow = mat.rows();
|
||||
res.ncol = mat.cols();
|
||||
res.nzmax = res.nrow * res.ncol;
|
||||
res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride();
|
||||
res.x = (void*)(mat.derived().data());
|
||||
res.z = 0;
|
||||
|
||||
internal::cholmod_configure_matrix<Scalar>::run(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix.
|
||||
* The data are not copied but shared. */
|
||||
template<typename Scalar, int Flags, typename StorageIndex>
|
||||
MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm)
|
||||
{
|
||||
return MappedSparseMatrix<Scalar,Flags,StorageIndex>
|
||||
(cm.nrow, cm.ncol, static_cast<StorageIndex*>(cm.p)[cm.ncol],
|
||||
static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) );
|
||||
}
|
||||
|
||||
enum CholmodMode {
|
||||
CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt
|
||||
};
|
||||
|
||||
|
||||
/** \ingroup CholmodSupport_Module
|
||||
* \class CholmodBase
|
||||
* \brief The base class for the direct Cholesky factorization of Cholmod
|
||||
* \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo, typename Derived>
|
||||
class CholmodBase : public SparseSolverBase<Derived>
|
||||
{
|
||||
protected:
|
||||
typedef SparseSolverBase<Derived> Base;
|
||||
using Base::derived;
|
||||
using Base::m_isInitialized;
|
||||
public:
|
||||
typedef _MatrixType MatrixType;
|
||||
enum { UpLo = _UpLo };
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef MatrixType CholMatrixType;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
enum {
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
CholmodBase()
|
||||
: m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
|
||||
cholmod_start(&m_cholmod);
|
||||
}
|
||||
|
||||
explicit CholmodBase(const MatrixType& matrix)
|
||||
: m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
|
||||
m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
|
||||
cholmod_start(&m_cholmod);
|
||||
compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodBase()
|
||||
{
|
||||
if(m_cholmodFactor)
|
||||
cholmod_free_factor(&m_cholmodFactor, &m_cholmod);
|
||||
cholmod_finish(&m_cholmod);
|
||||
}
|
||||
|
||||
inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
|
||||
inline StorageIndex rows() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
|
||||
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful,
|
||||
* \c NumericalIssue if the matrix.appears to be negative.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
|
||||
return m_info;
|
||||
}
|
||||
|
||||
/** Computes the sparse Cholesky decomposition of \a matrix */
|
||||
Derived& compute(const MatrixType& matrix)
|
||||
{
|
||||
analyzePattern(matrix);
|
||||
factorize(matrix);
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** Performs a symbolic decomposition on the sparsity pattern of \a matrix.
|
||||
*
|
||||
* This function is particularly useful when solving for several problems having the same structure.
|
||||
*
|
||||
* \sa factorize()
|
||||
*/
|
||||
void analyzePattern(const MatrixType& matrix)
|
||||
{
|
||||
if(m_cholmodFactor)
|
||||
{
|
||||
cholmod_free_factor(&m_cholmodFactor, &m_cholmod);
|
||||
m_cholmodFactor = 0;
|
||||
}
|
||||
cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
|
||||
m_cholmodFactor = cholmod_analyze(&A, &m_cholmod);
|
||||
|
||||
this->m_isInitialized = true;
|
||||
this->m_info = Success;
|
||||
m_analysisIsOk = true;
|
||||
m_factorizationIsOk = false;
|
||||
}
|
||||
|
||||
/** Performs a numeric decomposition of \a matrix
|
||||
*
|
||||
* The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed.
|
||||
*
|
||||
* \sa analyzePattern()
|
||||
*/
|
||||
void factorize(const MatrixType& matrix)
|
||||
{
|
||||
eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
|
||||
cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
|
||||
cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
|
||||
|
||||
// If the factorization failed, minor is the column at which it did. On success minor == n.
|
||||
this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
|
||||
m_factorizationIsOk = true;
|
||||
}
|
||||
|
||||
/** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations.
|
||||
* See the Cholmod user guide for details. */
|
||||
cholmod_common& cholmod() { return m_cholmod; }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal */
|
||||
template<typename Rhs,typename Dest>
|
||||
void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const
|
||||
{
|
||||
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
|
||||
const Index size = m_cholmodFactor->n;
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
eigen_assert(size==b.rows());
|
||||
|
||||
// Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
|
||||
Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
|
||||
|
||||
cholmod_dense b_cd = viewAsCholmod(b_ref);
|
||||
cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
|
||||
if(!x_cd)
|
||||
{
|
||||
this->m_info = NumericalIssue;
|
||||
return;
|
||||
}
|
||||
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
|
||||
dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
|
||||
cholmod_free_dense(&x_cd, &m_cholmod);
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename RhsDerived, typename DestDerived>
|
||||
void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const
|
||||
{
|
||||
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
|
||||
const Index size = m_cholmodFactor->n;
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
eigen_assert(size==b.rows());
|
||||
|
||||
// note: cs stands for Cholmod Sparse
|
||||
Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived());
|
||||
cholmod_sparse b_cs = viewAsCholmod(b_ref);
|
||||
cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod);
|
||||
if(!x_cs)
|
||||
{
|
||||
this->m_info = NumericalIssue;
|
||||
return;
|
||||
}
|
||||
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
|
||||
dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
|
||||
cholmod_free_sparse(&x_cs, &m_cholmod);
|
||||
}
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
|
||||
/** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization.
|
||||
*
|
||||
* During the numerical factorization, an offset term is added to the diagonal coefficients:\n
|
||||
* \c d_ii = \a offset + \c d_ii
|
||||
*
|
||||
* The default is \a offset=0.
|
||||
*
|
||||
* \returns a reference to \c *this.
|
||||
*/
|
||||
Derived& setShift(const RealScalar& offset)
|
||||
{
|
||||
m_shiftOffset[0] = double(offset);
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns the determinant of the underlying matrix from the current factorization */
|
||||
Scalar determinant() const
|
||||
{
|
||||
using std::exp;
|
||||
return exp(logDeterminant());
|
||||
}
|
||||
|
||||
/** \returns the log determinant of the underlying matrix from the current factorization */
|
||||
Scalar logDeterminant() const
|
||||
{
|
||||
using std::log;
|
||||
using numext::real;
|
||||
eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
|
||||
|
||||
RealScalar logDet = 0;
|
||||
Scalar *x = static_cast<Scalar*>(m_cholmodFactor->x);
|
||||
if (m_cholmodFactor->is_super)
|
||||
{
|
||||
// Supernodal factorization stored as a packed list of dense column-major blocs,
|
||||
// as described by the following structure:
|
||||
|
||||
// super[k] == index of the first column of the j-th super node
|
||||
StorageIndex *super = static_cast<StorageIndex*>(m_cholmodFactor->super);
|
||||
// pi[k] == offset to the description of row indices
|
||||
StorageIndex *pi = static_cast<StorageIndex*>(m_cholmodFactor->pi);
|
||||
// px[k] == offset to the respective dense block
|
||||
StorageIndex *px = static_cast<StorageIndex*>(m_cholmodFactor->px);
|
||||
|
||||
Index nb_super_nodes = m_cholmodFactor->nsuper;
|
||||
for (Index k=0; k < nb_super_nodes; ++k)
|
||||
{
|
||||
StorageIndex ncols = super[k + 1] - super[k];
|
||||
StorageIndex nrows = pi[k + 1] - pi[k];
|
||||
|
||||
Map<const Array<Scalar,1,Dynamic>, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1));
|
||||
logDet += sk.real().log().sum();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Simplicial factorization stored as standard CSC matrix.
|
||||
StorageIndex *p = static_cast<StorageIndex*>(m_cholmodFactor->p);
|
||||
Index size = m_cholmodFactor->n;
|
||||
for (Index k=0; k<size; ++k)
|
||||
logDet += log(real( x[p[k]] ));
|
||||
}
|
||||
if (m_cholmodFactor->is_ll)
|
||||
logDet *= 2.0;
|
||||
return logDet;
|
||||
};
|
||||
|
||||
template<typename Stream>
|
||||
void dumpMemory(Stream& /*s*/)
|
||||
{}
|
||||
|
||||
protected:
|
||||
mutable cholmod_common m_cholmod;
|
||||
cholmod_factor* m_cholmodFactor;
|
||||
double m_shiftOffset[2];
|
||||
mutable ComputationInfo m_info;
|
||||
int m_factorizationIsOk;
|
||||
int m_analysisIsOk;
|
||||
};
|
||||
|
||||
/** \ingroup CholmodSupport_Module
|
||||
* \class CholmodSimplicialLLT
|
||||
* \brief A simplicial direct Cholesky (LLT) factorization and solver based on Cholmod
|
||||
*
|
||||
* This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization
|
||||
* using the Cholmod library.
|
||||
* This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest.
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> >
|
||||
{
|
||||
typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base;
|
||||
using Base::m_cholmod;
|
||||
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
|
||||
CholmodSimplicialLLT() : Base() { init(); }
|
||||
|
||||
CholmodSimplicialLLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
this->compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSimplicialLLT() {}
|
||||
protected:
|
||||
void init()
|
||||
{
|
||||
m_cholmod.final_asis = 0;
|
||||
m_cholmod.supernodal = CHOLMOD_SIMPLICIAL;
|
||||
m_cholmod.final_ll = 1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/** \ingroup CholmodSupport_Module
|
||||
* \class CholmodSimplicialLDLT
|
||||
* \brief A simplicial direct Cholesky (LDLT) factorization and solver based on Cholmod
|
||||
*
|
||||
* This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization
|
||||
* using the Cholmod library.
|
||||
* This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest.
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> >
|
||||
{
|
||||
typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base;
|
||||
using Base::m_cholmod;
|
||||
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
|
||||
CholmodSimplicialLDLT() : Base() { init(); }
|
||||
|
||||
CholmodSimplicialLDLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
this->compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSimplicialLDLT() {}
|
||||
protected:
|
||||
void init()
|
||||
{
|
||||
m_cholmod.final_asis = 1;
|
||||
m_cholmod.supernodal = CHOLMOD_SIMPLICIAL;
|
||||
}
|
||||
};
|
||||
|
||||
/** \ingroup CholmodSupport_Module
|
||||
* \class CholmodSupernodalLLT
|
||||
* \brief A supernodal Cholesky (LLT) factorization and solver based on Cholmod
|
||||
*
|
||||
* This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization
|
||||
* using the Cholmod library.
|
||||
* This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM.
|
||||
* The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> >
|
||||
{
|
||||
typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base;
|
||||
using Base::m_cholmod;
|
||||
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
|
||||
CholmodSupernodalLLT() : Base() { init(); }
|
||||
|
||||
CholmodSupernodalLLT(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
this->compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodSupernodalLLT() {}
|
||||
protected:
|
||||
void init()
|
||||
{
|
||||
m_cholmod.final_asis = 1;
|
||||
m_cholmod.supernodal = CHOLMOD_SUPERNODAL;
|
||||
}
|
||||
};
|
||||
|
||||
/** \ingroup CholmodSupport_Module
|
||||
* \class CholmodDecomposition
|
||||
* \brief A general Cholesky factorization and solver based on Cholmod
|
||||
*
|
||||
* This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization
|
||||
* using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices
|
||||
* X and B can be either dense or sparse.
|
||||
*
|
||||
* This variant permits to change the underlying Cholesky method at runtime.
|
||||
* On the other hand, it does not provide access to the result of the factorization.
|
||||
* The default is to let Cholmod automatically choose between a simplicial and supernodal factorization.
|
||||
*
|
||||
* \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
|
||||
* or Upper. Default is Lower.
|
||||
*
|
||||
* \implsparsesolverconcept
|
||||
*
|
||||
* This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed.
|
||||
*
|
||||
* \warning Only double precision real and complex scalar types are supported by Cholmod.
|
||||
*
|
||||
* \sa \ref TutorialSparseSolverConcept
|
||||
*/
|
||||
template<typename _MatrixType, int _UpLo = Lower>
|
||||
class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> >
|
||||
{
|
||||
typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base;
|
||||
using Base::m_cholmod;
|
||||
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
|
||||
CholmodDecomposition() : Base() { init(); }
|
||||
|
||||
CholmodDecomposition(const MatrixType& matrix) : Base()
|
||||
{
|
||||
init();
|
||||
this->compute(matrix);
|
||||
}
|
||||
|
||||
~CholmodDecomposition() {}
|
||||
|
||||
void setMode(CholmodMode mode)
|
||||
{
|
||||
switch(mode)
|
||||
{
|
||||
case CholmodAuto:
|
||||
m_cholmod.final_asis = 1;
|
||||
m_cholmod.supernodal = CHOLMOD_AUTO;
|
||||
break;
|
||||
case CholmodSimplicialLLt:
|
||||
m_cholmod.final_asis = 0;
|
||||
m_cholmod.supernodal = CHOLMOD_SIMPLICIAL;
|
||||
m_cholmod.final_ll = 1;
|
||||
break;
|
||||
case CholmodSupernodalLLt:
|
||||
m_cholmod.final_asis = 1;
|
||||
m_cholmod.supernodal = CHOLMOD_SUPERNODAL;
|
||||
break;
|
||||
case CholmodLDLt:
|
||||
m_cholmod.final_asis = 1;
|
||||
m_cholmod.supernodal = CHOLMOD_SIMPLICIAL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
protected:
|
||||
void init()
|
||||
{
|
||||
m_cholmod.final_asis = 1;
|
||||
m_cholmod.supernodal = CHOLMOD_AUTO;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CHOLMODSUPPORT_H
|
||||
329
external/include/eigen3/Eigen/src/Core/Array.h
vendored
Normal file
329
external/include/eigen3/Eigen/src/Core/Array.h
vendored
Normal file
@@ -0,0 +1,329 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ARRAY_H
|
||||
#define EIGEN_ARRAY_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Array
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief General-purpose arrays with easy API for coefficient-wise operations
|
||||
*
|
||||
* The %Array class is very similar to the Matrix class. It provides
|
||||
* general-purpose one- and two-dimensional arrays. The difference between the
|
||||
* %Array and the %Matrix class is primarily in the API: the API for the
|
||||
* %Array class provides easy access to coefficient-wise operations, while the
|
||||
* API for the %Matrix class provides easy access to linear-algebra
|
||||
* operations.
|
||||
*
|
||||
* See documentation of class Matrix for detailed information on the template parameters
|
||||
* storage layout.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
|
||||
*
|
||||
* \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
class Array
|
||||
: public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef PlainObjectBase<Array> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Array)
|
||||
|
||||
enum { Options = _Options };
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
protected:
|
||||
template <typename Derived, typename OtherDerived, bool IsVector>
|
||||
friend struct internal::conservative_resize_like_impl;
|
||||
|
||||
using Base::m_storage;
|
||||
|
||||
public:
|
||||
|
||||
using Base::base;
|
||||
using Base::coeff;
|
||||
using Base::coeffRef;
|
||||
|
||||
/**
|
||||
* The usage of
|
||||
* using Base::operator=;
|
||||
* fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
|
||||
* the usage of 'using'. This should be done only for operator=.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
}
|
||||
|
||||
/** Set all the entries to \a value.
|
||||
* \sa DenseBase::setConstant(), DenseBase::fill()
|
||||
*/
|
||||
/* This overload is needed because the usage of
|
||||
* using Base::operator=;
|
||||
* fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
|
||||
* the usage of 'using'. This should be done only for operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
|
||||
{
|
||||
Base::setConstant(value);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Copies the value of the expression \a other into \c *this with automatic resizing.
|
||||
*
|
||||
* *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
|
||||
* it will be initialized.
|
||||
*
|
||||
* Note that copying a row-vector into a vector (and conversely) is allowed.
|
||||
* The resizing, if any, is then done in the appropriate way so that row-vectors
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
}
|
||||
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array& operator=(const Array& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
}
|
||||
|
||||
/** Default constructor.
|
||||
*
|
||||
* For fixed-size matrices, does nothing.
|
||||
*
|
||||
* For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix
|
||||
* is called a null matrix. This constructor is the unique way to create null matrices: resizing
|
||||
* a matrix to 0 is not supported.
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array() : Base()
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME is it still needed ??
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
{
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Array(const T& x)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init1<T>(x);
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
this->template _init2<T0,T1>(val0, val1);
|
||||
}
|
||||
#else
|
||||
/** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
|
||||
EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
|
||||
/** Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass the dimension here, so it makes more sense to use the default
|
||||
* constructor Array() instead.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Array(Index dim);
|
||||
/** constructs an initialized 1x1 Array with the given coefficient */
|
||||
Array(const Scalar& value);
|
||||
/** constructs an uninitialized array with \a rows rows and \a cols columns.
|
||||
*
|
||||
* This is useful for dynamic-size arrays. For fixed-size arrays,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Array() instead. */
|
||||
Array(Index rows, Index cols);
|
||||
/** constructs an initialized 2D vector with given coefficients */
|
||||
Array(const Scalar& val0, const Scalar& val1);
|
||||
#endif
|
||||
|
||||
/** constructs an initialized 3D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
|
||||
m_storage.data()[0] = val0;
|
||||
m_storage.data()[1] = val1;
|
||||
m_storage.data()[2] = val2;
|
||||
}
|
||||
/** constructs an initialized 4D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
|
||||
m_storage.data()[0] = val0;
|
||||
m_storage.data()[1] = val1;
|
||||
m_storage.data()[2] = val2;
|
||||
m_storage.data()[3] = val3;
|
||||
}
|
||||
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const Array& other)
|
||||
: Base(other)
|
||||
{ }
|
||||
|
||||
private:
|
||||
struct PrivateType {};
|
||||
public:
|
||||
|
||||
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
|
||||
typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
|
||||
PrivateType>::type = PrivateType())
|
||||
: Base(other.derived())
|
||||
{ }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
||||
|
||||
#ifdef EIGEN_ARRAY_PLUGIN
|
||||
#include EIGEN_ARRAY_PLUGIN
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
||||
template<typename MatrixType, typename OtherDerived, bool SwapPointers>
|
||||
friend struct internal::matrix_swap_impl;
|
||||
};
|
||||
|
||||
/** \defgroup arraytypedefs Global array typedefs
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* Eigen defines several typedef shortcuts for most common 1D and 2D array types.
|
||||
*
|
||||
* The general patterns are the following:
|
||||
*
|
||||
* \c ArrayRowsColsType where \c Rows and \c Cols can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size,
|
||||
* and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd
|
||||
* for complex double.
|
||||
*
|
||||
* For example, \c Array33d is a fixed-size 3x3 array type of doubles, and \c ArrayXXf is a dynamic-size matrix of floats.
|
||||
*
|
||||
* There are also \c ArraySizeType which are self-explanatory. For example, \c Array4cf is
|
||||
* a fixed-size 1D array of 4 complex floats.
|
||||
*
|
||||
* \sa class Array
|
||||
*/
|
||||
|
||||
#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
|
||||
/** \ingroup arraytypedefs */ \
|
||||
typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix; \
|
||||
/** \ingroup arraytypedefs */ \
|
||||
typedef Array<Type, Size, 1> Array##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
|
||||
/** \ingroup arraytypedefs */ \
|
||||
typedef Array<Type, Size, Dynamic> Array##Size##X##TypeSuffix; \
|
||||
/** \ingroup arraytypedefs */ \
|
||||
typedef Array<Type, Dynamic, Size> Array##X##Size##TypeSuffix;
|
||||
|
||||
#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
|
||||
EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
|
||||
EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
|
||||
EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
|
||||
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i)
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f)
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d)
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
|
||||
EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
|
||||
|
||||
#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES
|
||||
#undef EIGEN_MAKE_ARRAY_TYPEDEFS
|
||||
|
||||
#undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE
|
||||
|
||||
#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
|
||||
using Eigen::Matrix##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::Vector##SizeSuffix##TypeSuffix; \
|
||||
using Eigen::RowVector##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
|
||||
|
||||
#define EIGEN_USING_ARRAY_TYPEDEFS \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \
|
||||
EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd)
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ARRAY_H
|
||||
226
external/include/eigen3/Eigen/src/Core/ArrayBase.h
vendored
Normal file
226
external/include/eigen3/Eigen/src/Core/ArrayBase.h
vendored
Normal file
@@ -0,0 +1,226 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ARRAYBASE_H
|
||||
#define EIGEN_ARRAYBASE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename ExpressionType> class MatrixWrapper;
|
||||
|
||||
/** \class ArrayBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for all 1D and 2D array, and related expressions
|
||||
*
|
||||
* An array is similar to a dense vector or matrix. While matrices are mathematical
|
||||
* objects with well defined linear algebra operators, an array is just a collection
|
||||
* of scalar values arranged in a one or two dimensionnal fashion. As the main consequence,
|
||||
* all operations applied to an array are performed coefficient wise. Furthermore,
|
||||
* arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient
|
||||
* constructors allowing to easily write generic code working for both scalar values
|
||||
* and arrays.
|
||||
*
|
||||
* This class is the base that is inherited by all array expression types.
|
||||
*
|
||||
* \tparam Derived is the derived type, e.g., an array or an expression type.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
|
||||
*
|
||||
* \sa class MatrixBase, \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> class ArrayBase
|
||||
: public DenseBase<Derived>
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** The base class for a given storage type. */
|
||||
typedef ArrayBase StorageBaseType;
|
||||
|
||||
typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
|
||||
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
typedef DenseBase<Derived> Base;
|
||||
using Base::RowsAtCompileTime;
|
||||
using Base::ColsAtCompileTime;
|
||||
using Base::SizeAtCompileTime;
|
||||
using Base::MaxRowsAtCompileTime;
|
||||
using Base::MaxColsAtCompileTime;
|
||||
using Base::MaxSizeAtCompileTime;
|
||||
using Base::IsVectorAtCompileTime;
|
||||
using Base::Flags;
|
||||
|
||||
using Base::derived;
|
||||
using Base::const_cast_derived;
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::coeff;
|
||||
using Base::coeffRef;
|
||||
using Base::lazyAssign;
|
||||
using Base::operator=;
|
||||
using Base::operator+=;
|
||||
using Base::operator-=;
|
||||
using Base::operator*=;
|
||||
using Base::operator/=;
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
|
||||
#define EIGEN_DOC_UNARY_ADDONS(X,Y)
|
||||
# include "../plugins/CommonCwiseUnaryOps.h"
|
||||
# include "../plugins/MatrixCwiseUnaryOps.h"
|
||||
# include "../plugins/ArrayCwiseUnaryOps.h"
|
||||
# include "../plugins/CommonCwiseBinaryOps.h"
|
||||
# include "../plugins/MatrixCwiseBinaryOps.h"
|
||||
# include "../plugins/ArrayCwiseBinaryOps.h"
|
||||
# ifdef EIGEN_ARRAYBASE_PLUGIN
|
||||
# include EIGEN_ARRAYBASE_PLUGIN
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
#undef EIGEN_DOC_UNARY_ADDONS
|
||||
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const ArrayBase& other)
|
||||
{
|
||||
internal::call_assignment(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** Set all the entries to \a value.
|
||||
* \sa DenseBase::setConstant(), DenseBase::fill() */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const Scalar &value)
|
||||
{ Base::setConstant(value); return derived(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator+=(const Scalar& scalar);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator-=(const Scalar& scalar);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator+=(const ArrayBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator-=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator*=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator/=(const ArrayBase<OtherDerived>& other);
|
||||
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
ArrayBase<Derived>& array() { return *this; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ArrayBase<Derived>& array() const { return *this; }
|
||||
|
||||
/** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
|
||||
* \sa MatrixBase::array() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); }
|
||||
|
||||
// template<typename Dest>
|
||||
// inline void evalTo(Dest& dst) const { dst = matrix(); }
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC
|
||||
ArrayBase() : Base() {}
|
||||
|
||||
private:
|
||||
explicit ArrayBase(Index);
|
||||
ArrayBase(Index,Index);
|
||||
template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
|
||||
protected:
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
};
|
||||
|
||||
/** replaces \c *this by \c *this - \a other.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this + \a other.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this * \a other coefficient wise.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this / \a other coefficient wise.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
|
||||
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ARRAYBASE_H
|
||||
209
external/include/eigen3/Eigen/src/Core/ArrayWrapper.h
vendored
Normal file
209
external/include/eigen3/Eigen/src/Core/ArrayWrapper.h
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ARRAYWRAPPER_H
|
||||
#define EIGEN_ARRAYWRAPPER_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class ArrayWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a mathematical vector or matrix as an array object
|
||||
*
|
||||
* This class is the return type of MatrixBase::array(), and most of the time
|
||||
* this is the only way it is use.
|
||||
*
|
||||
* \sa MatrixBase::array(), class MatrixWrapper
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ExpressionType>
|
||||
struct traits<ArrayWrapper<ExpressionType> >
|
||||
: public traits<typename remove_all<typename ExpressionType::Nested>::type >
|
||||
{
|
||||
typedef ArrayXpr XprKind;
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
|
||||
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename ExpressionType>
|
||||
class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
||||
{
|
||||
public:
|
||||
typedef ArrayBase<ArrayWrapper> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
|
||||
typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<ExpressionType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||
|
||||
using Base::coeffRef;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const { dst = m_expression; }
|
||||
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
EIGEN_DEVICE_FUNC
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index) */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize) { m_expression.resize(newSize); }
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
|
||||
|
||||
protected:
|
||||
NestedExpressionType m_expression;
|
||||
};
|
||||
|
||||
/** \class MatrixWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of an array as a mathematical vector or matrix
|
||||
*
|
||||
* This class is the return type of ArrayBase::matrix(), and most of the time
|
||||
* this is the only way it is use.
|
||||
*
|
||||
* \sa MatrixBase::matrix(), class ArrayWrapper
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ExpressionType>
|
||||
struct traits<MatrixWrapper<ExpressionType> >
|
||||
: public traits<typename remove_all<typename ExpressionType::Nested>::type >
|
||||
{
|
||||
typedef MatrixXpr XprKind;
|
||||
// Let's remove NestByRefBit
|
||||
enum {
|
||||
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
|
||||
LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
|
||||
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename ExpressionType>
|
||||
class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
||||
{
|
||||
public:
|
||||
typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
|
||||
typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<ExpressionType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||
|
||||
using Base::coeffRef;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return m_expression.data(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_expression.derived().coeffRef(rowId, colId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_expression.coeffRef(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<NestedExpressionType>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index) */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize) { m_expression.resize(newSize); }
|
||||
/** Forwards the resizing request to the nested expression
|
||||
* \sa DenseBase::resize(Index,Index)*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
|
||||
|
||||
protected:
|
||||
NestedExpressionType m_expression;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ARRAYWRAPPER_H
|
||||
90
external/include/eigen3/Eigen/src/Core/Assign.h
vendored
Normal file
90
external/include/eigen3/Eigen/src/Core/Assign.h
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007 Michael Olbrich <michael.olbrich@gmx.net>
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ASSIGN_H
|
||||
#define EIGEN_ASSIGN_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
|
||||
::lazyAssign(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
enum{
|
||||
SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value
|
||||
};
|
||||
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Derived)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
|
||||
|
||||
eigen_assert(rows() == other.rows() && cols() == other.cols());
|
||||
internal::call_assignment_no_alias(derived(),other.derived());
|
||||
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
internal::call_assignment(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
|
||||
{
|
||||
internal::call_assignment(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
|
||||
{
|
||||
internal::call_assignment(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
internal::call_assignment(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
|
||||
{
|
||||
internal::call_assignment(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
other.derived().evalTo(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ASSIGN_H
|
||||
935
external/include/eigen3/Eigen/src/Core/AssignEvaluator.h
vendored
Normal file
935
external/include/eigen3/Eigen/src/Core/AssignEvaluator.h
vendored
Normal file
@@ -0,0 +1,935 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ASSIGN_EVALUATOR_H
|
||||
#define EIGEN_ASSIGN_EVALUATOR_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// This implementation is based on Assign.h
|
||||
|
||||
namespace internal {
|
||||
|
||||
/***************************************************************************
|
||||
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
||||
***************************************************************************/
|
||||
|
||||
// copy_using_evaluator_traits is based on assign_traits
|
||||
|
||||
template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
|
||||
struct copy_using_evaluator_traits
|
||||
{
|
||||
typedef typename DstEvaluator::XprType Dst;
|
||||
typedef typename Dst::Scalar DstScalar;
|
||||
|
||||
enum {
|
||||
DstFlags = DstEvaluator::Flags,
|
||||
SrcFlags = SrcEvaluator::Flags
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
DstAlignment = DstEvaluator::Alignment,
|
||||
SrcAlignment = SrcEvaluator::Alignment,
|
||||
DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
|
||||
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
|
||||
: int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
|
||||
: int(Dst::RowsAtCompileTime),
|
||||
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
|
||||
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
|
||||
: int(Dst::MaxRowsAtCompileTime),
|
||||
OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime
|
||||
};
|
||||
|
||||
// TODO distinguish between linear traversal and inner-traversals
|
||||
typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
|
||||
typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
|
||||
|
||||
enum {
|
||||
LinearPacketSize = unpacket_traits<LinearPacketType>::size,
|
||||
InnerPacketSize = unpacket_traits<InnerPacketType>::size
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
|
||||
InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
DstIsRowMajor = DstFlags&RowMajorBit,
|
||||
SrcIsRowMajor = SrcFlags&RowMajorBit,
|
||||
StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
|
||||
MightVectorize = bool(StorageOrdersAgree)
|
||||
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
|
||||
&& bool(functor_traits<AssignFunc>::PacketAccess),
|
||||
MayInnerVectorize = MightVectorize
|
||||
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
|
||||
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
|
||||
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
|
||||
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
|
||||
MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
|
||||
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
so it's only good for large enough sizes. */
|
||||
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
|
||||
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
|
||||
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
||||
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
||||
in a fixed-size matrix
|
||||
However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
|
||||
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||
: int(MayLinearize) ? int(LinearTraversal)
|
||||
: int(DefaultTraversal),
|
||||
Vectorized = int(Traversal) == InnerVectorizedTraversal
|
||||
|| int(Traversal) == LinearVectorizedTraversal
|
||||
|| int(Traversal) == SliceVectorizedTraversal
|
||||
};
|
||||
|
||||
typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
|
||||
|
||||
private:
|
||||
enum {
|
||||
ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
|
||||
: Vectorized ? InnerPacketSize
|
||||
: 1,
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
|
||||
MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
|
||||
&& int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
|
||||
MayUnrollInner = int(InnerSize) != Dynamic
|
||||
&& int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
|
||||
? (
|
||||
int(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(MayUnrollInner) ? int(InnerUnrolling)
|
||||
: int(NoUnrolling)
|
||||
)
|
||||
: int(Traversal) == int(LinearVectorizedTraversal)
|
||||
? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
|
||||
? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
: int(Traversal) == int(LinearTraversal)
|
||||
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
#if EIGEN_UNALIGNED_VECTORIZE
|
||||
: int(Traversal) == int(SliceVectorizedTraversal)
|
||||
? ( bool(MayUnrollInner) ? int(InnerUnrolling)
|
||||
: int(NoUnrolling) )
|
||||
#endif
|
||||
: int(NoUnrolling)
|
||||
};
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
static void debug()
|
||||
{
|
||||
std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
|
||||
std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
|
||||
std::cerr.setf(std::ios::hex, std::ios::basefield);
|
||||
std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
|
||||
std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
|
||||
std::cerr.unsetf(std::ios::hex);
|
||||
EIGEN_DEBUG_VAR(DstAlignment)
|
||||
EIGEN_DEBUG_VAR(SrcAlignment)
|
||||
EIGEN_DEBUG_VAR(LinearRequiredAlignment)
|
||||
EIGEN_DEBUG_VAR(InnerRequiredAlignment)
|
||||
EIGEN_DEBUG_VAR(JointAlignment)
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(LinearPacketSize)
|
||||
EIGEN_DEBUG_VAR(InnerPacketSize)
|
||||
EIGEN_DEBUG_VAR(ActualPacketSize)
|
||||
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||
EIGEN_DEBUG_VAR(MightVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearize)
|
||||
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
||||
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
|
||||
EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
|
||||
EIGEN_DEBUG_VAR(UnrollingLimit)
|
||||
EIGEN_DEBUG_VAR(MayUnrollCompletely)
|
||||
EIGEN_DEBUG_VAR(MayUnrollInner)
|
||||
std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 2 : meta-unrollers
|
||||
***************************************************************************/
|
||||
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
|
||||
{
|
||||
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
inner = Index % DstXprType::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index_, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
||||
{
|
||||
kernel.assignCoeffByOuterInner(outer, Index_);
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
|
||||
};
|
||||
|
||||
/***********************
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
|
||||
{
|
||||
kernel.assignCoeff(Index);
|
||||
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
|
||||
template<typename Kernel, int Index, int Stop>
|
||||
struct copy_using_evaluator_innervec_CompleteUnrolling
|
||||
{
|
||||
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
|
||||
enum {
|
||||
outer = Index / DstXprType::InnerSizeAtCompileTime,
|
||||
inner = Index % DstXprType::InnerSizeAtCompileTime,
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
|
||||
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop>
|
||||
struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||
};
|
||||
|
||||
template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
||||
{
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
|
||||
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
||||
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 3 : implementation of all cases
|
||||
***************************************************************************/
|
||||
|
||||
// dense_assignment_loop is based on assign_impl
|
||||
|
||||
template<typename Kernel,
|
||||
int Traversal = Kernel::AssignmentTraits::Traversal,
|
||||
int Unrolling = Kernel::AssignmentTraits::Unrolling>
|
||||
struct dense_assignment_loop;
|
||||
|
||||
/************************
|
||||
*** Default traversal ***
|
||||
************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
|
||||
{
|
||||
for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
|
||||
for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
const Index outerSize = kernel.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
/***************************
|
||||
*** Linear vectorization ***
|
||||
***************************/
|
||||
|
||||
|
||||
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
|
||||
// of the non vectorizable beginning and ending parts
|
||||
|
||||
template <bool IsAligned = false>
|
||||
struct unaligned_dense_assignment_loop
|
||||
{
|
||||
// if IsAligned = true, then do nothing
|
||||
template <typename Kernel>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unaligned_dense_assignment_loop<false>
|
||||
{
|
||||
// MSVC must not inline this functions. If it does, it fails to optimize the
|
||||
// packet access path.
|
||||
// FIXME check which version exhibits this issue
|
||||
#if EIGEN_COMP_MSVC
|
||||
template <typename Kernel>
|
||||
static EIGEN_DONT_INLINE void run(Kernel &kernel,
|
||||
Index start,
|
||||
Index end)
|
||||
#else
|
||||
template <typename Kernel>
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
|
||||
Index start,
|
||||
Index end)
|
||||
#endif
|
||||
{
|
||||
for (Index index = start; index < end; ++index)
|
||||
kernel.assignCoeff(index);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
const Index size = kernel.size();
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
|
||||
packetSize = unpacket_traits<PacketType>::size,
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
|
||||
dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment),
|
||||
srcAlignment = Kernel::AssignmentTraits::JointAlignment
|
||||
};
|
||||
const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
|
||||
const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||
|
||||
unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
|
||||
|
||||
for(Index index = alignedStart; index < alignedEnd; index += packetSize)
|
||||
kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
|
||||
|
||||
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
|
||||
enum { size = DstXprType::SizeAtCompileTime,
|
||||
packetSize =unpacket_traits<PacketType>::size,
|
||||
alignedSize = (size/packetSize)*packetSize };
|
||||
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
||||
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Inner vectorization ***
|
||||
**************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
||||
};
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index packetSize = unpacket_traits<PacketType>::size;
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
for(Index inner = 0; inner < innerSize; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::AssignmentTraits Traits;
|
||||
const Index outerSize = kernel.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
|
||||
Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
|
||||
}
|
||||
};
|
||||
|
||||
/***********************
|
||||
*** Linear traversal ***
|
||||
***********************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
const Index size = kernel.size();
|
||||
for(Index i = 0; i < size; ++i)
|
||||
kernel.assignCoeff(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
|
||||
}
|
||||
};
|
||||
|
||||
/**************************
|
||||
*** Slice vectorization ***
|
||||
***************************/
|
||||
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
enum {
|
||||
packetSize = unpacket_traits<PacketType>::size,
|
||||
requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
|
||||
alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
|
||||
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
|
||||
dstAlignment = alignable ? int(requestedAlignment)
|
||||
: int(Kernel::AssignmentTraits::DstAlignment)
|
||||
};
|
||||
const Scalar *dst_ptr = kernel.dstDataPtr();
|
||||
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
|
||||
{
|
||||
// the pointer is not aligend-on scalar, so alignment is not possible
|
||||
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
|
||||
}
|
||||
const Index packetAlignedMask = packetSize - 1;
|
||||
const Index innerSize = kernel.innerSize();
|
||||
const Index outerSize = kernel.outerSize();
|
||||
const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
|
||||
Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
|
||||
|
||||
for(Index outer = 0; outer < outerSize; ++outer)
|
||||
{
|
||||
const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
|
||||
// do the non-vectorizable part of the assignment
|
||||
for(Index inner = 0; inner<alignedStart ; ++inner)
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
|
||||
// do the vectorizable part of the assignment
|
||||
for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
|
||||
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
|
||||
|
||||
// do the non-vectorizable part of the assignment
|
||||
for(Index inner = alignedEnd; inner<innerSize ; ++inner)
|
||||
kernel.assignCoeffByOuterInner(outer, inner);
|
||||
|
||||
alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#if EIGEN_UNALIGNED_VECTORIZE
|
||||
template<typename Kernel>
|
||||
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||
{
|
||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||
typedef typename Kernel::PacketType PacketType;
|
||||
|
||||
enum { size = DstXprType::InnerSizeAtCompileTime,
|
||||
packetSize =unpacket_traits<PacketType>::size,
|
||||
vectorizableSize = (size/packetSize)*packetSize };
|
||||
|
||||
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
|
||||
{
|
||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
|
||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* Part 4 : Generic dense assignment kernel
|
||||
***************************************************************************/
|
||||
|
||||
// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
|
||||
// to another dense writable evaluator.
|
||||
// It is parametrized by the two evaluators, and the actual assignment functor.
|
||||
// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
|
||||
// One can customize the assignment using this generic dense_assignment_kernel with different
|
||||
// functors, or by completely overloading it, by-passing a functor.
|
||||
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
|
||||
class generic_dense_assignment_kernel
|
||||
{
|
||||
protected:
|
||||
typedef typename DstEvaluatorTypeT::XprType DstXprType;
|
||||
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
|
||||
public:
|
||||
|
||||
typedef DstEvaluatorTypeT DstEvaluatorType;
|
||||
typedef SrcEvaluatorTypeT SrcEvaluatorType;
|
||||
typedef typename DstEvaluatorType::Scalar Scalar;
|
||||
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
|
||||
typedef typename AssignmentTraits::PacketType PacketType;
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
|
||||
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
|
||||
{
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
AssignmentTraits::debug();
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
|
||||
EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
|
||||
EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
|
||||
EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
|
||||
EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
|
||||
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
|
||||
|
||||
/// Assign src(row,col) to dst(row,col) through the assignment functor.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
|
||||
{
|
||||
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
|
||||
}
|
||||
|
||||
/// \sa assignCoeff(Index,Index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
|
||||
{
|
||||
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
|
||||
}
|
||||
|
||||
/// \sa assignCoeff(Index,Index)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = rowIndexByOuterInner(outer, inner);
|
||||
Index col = colIndexByOuterInner(outer, inner);
|
||||
assignCoeff(row, col);
|
||||
}
|
||||
|
||||
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
|
||||
{
|
||||
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = rowIndexByOuterInner(outer, inner);
|
||||
Index col = colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
||||
return int(Traits::RowsAtCompileTime) == 1 ? 0
|
||||
: int(Traits::ColsAtCompileTime) == 1 ? inner
|
||||
: int(DstEvaluatorType::Flags)&RowMajorBit ? outer
|
||||
: inner;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
||||
return int(Traits::ColsAtCompileTime) == 1 ? 0
|
||||
: int(Traits::RowsAtCompileTime) == 1 ? inner
|
||||
: int(DstEvaluatorType::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
|
||||
{
|
||||
return m_dstExpr.data();
|
||||
}
|
||||
|
||||
protected:
|
||||
DstEvaluatorType& m_dst;
|
||||
const SrcEvaluatorType& m_src;
|
||||
const Functor &m_functor;
|
||||
// TODO find a way to avoid the needs of the original expression
|
||||
DstXprType& m_dstExpr;
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 5 : Entry point for dense rectangular assignment
|
||||
***************************************************************************/
|
||||
|
||||
template<typename DstXprType,typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(dst);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(src);
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
}
|
||||
|
||||
template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
|
||||
dst.resize(dstRows, dstCols);
|
||||
eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
// NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
|
||||
// we need to resize the destination after the source evaluator has been created.
|
||||
resize_if_allowed(dst, src, func);
|
||||
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
||||
|
||||
dense_assignment_loop<Kernel>::run(kernel);
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* Part 6 : Generic assignment
|
||||
***************************************************************************/
|
||||
|
||||
// Based on the respective shapes of the destination and source,
|
||||
// the class AssignmentKind determine the kind of assignment mechanism.
|
||||
// AssignmentKind must define a Kind typedef.
|
||||
template<typename DstShape, typename SrcShape> struct AssignmentKind;
|
||||
|
||||
// Assignement kind defined in this file:
|
||||
struct Dense2Dense {};
|
||||
struct EigenBase2EigenBase {};
|
||||
|
||||
template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
|
||||
template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
|
||||
|
||||
// This is the main assignment class
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor,
|
||||
typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
|
||||
typename EnableIf = void>
|
||||
struct Assignment;
|
||||
|
||||
|
||||
// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
|
||||
// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
|
||||
// So this intermediate function removes everything related to "assume-aliasing" such that Assignment
|
||||
// does not has to bother about these annoying details.
|
||||
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(const Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
|
||||
// Deal with "assume-aliasing"
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||
{
|
||||
typename plain_matrix_type<Src>::type tmp(src);
|
||||
call_assignment_no_alias(dst, tmp, func);
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||
{
|
||||
call_assignment_no_alias(dst, src, func);
|
||||
}
|
||||
|
||||
// by-pass "assume-aliasing"
|
||||
// When there is no aliasing, we require that 'dst' has been properly resized
|
||||
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
|
||||
{
|
||||
call_assignment_no_alias(dst.expression(), src, func);
|
||||
}
|
||||
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
enum {
|
||||
NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
|
||||
|| (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
|
||||
) && int(Dst::SizeAtCompileTime) != 1
|
||||
};
|
||||
|
||||
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
|
||||
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
|
||||
ActualDstType actualDst(dst);
|
||||
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
|
||||
|
||||
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
|
||||
|
||||
Assignment<Dst,Src,Func>::run(dst, src, func);
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
|
||||
}
|
||||
|
||||
// forward declaration
|
||||
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
|
||||
|
||||
// Generic Dense to Dense assignment
|
||||
// Note that the last template argument "Weak" is needed to make it possible to perform
|
||||
// both partial specialization+SFINAE without ambiguous specialization
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
internal::check_for_aliasing(dst, src);
|
||||
#endif
|
||||
|
||||
call_dense_assignment_loop(dst, src, func);
|
||||
}
|
||||
};
|
||||
|
||||
// Generic assignment through evalTo.
|
||||
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
|
||||
// Note that the last template argument "Weak" is needed to make it possible to perform
|
||||
// both partial specialization+SFINAE without ambiguous specialization
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.evalTo(dst);
|
||||
}
|
||||
|
||||
// NOTE The following two functions are templated to avoid their instanciation if not needed
|
||||
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.addTo(dst);
|
||||
}
|
||||
|
||||
template<typename SrcScalarType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
src.subTo(dst);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ASSIGN_EVALUATOR_H
|
||||
178
external/include/eigen3/Eigen/src/Core/Assign_MKL.h
vendored
Normal file
178
external/include/eigen3/Eigen/src/Core/Assign_MKL.h
vendored
Normal file
@@ -0,0 +1,178 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************
|
||||
* Content : Eigen bindings to Intel(R) MKL
|
||||
* MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_ASSIGN_VML_H
|
||||
#define EIGEN_ASSIGN_VML_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Dst, typename Src>
|
||||
class vml_assign_traits
|
||||
{
|
||||
private:
|
||||
enum {
|
||||
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
|
||||
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
|
||||
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
|
||||
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
|
||||
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
|
||||
: int(Dst::RowsAtCompileTime),
|
||||
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
|
||||
: int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
|
||||
: int(Dst::MaxRowsAtCompileTime),
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
||||
|
||||
MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
||||
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
|
||||
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
|
||||
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
|
||||
};
|
||||
public:
|
||||
enum {
|
||||
EnableVml = MightEnableVml && LargeEnough,
|
||||
Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
|
||||
};
|
||||
};
|
||||
|
||||
#define EIGEN_PP_EXPAND(ARG) ARG
|
||||
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
|
||||
#define EIGEN_VMLMODE_EXPAND_LA , VML_HA
|
||||
#else
|
||||
#define EIGEN_VMLMODE_EXPAND_LA , VML_LA
|
||||
#endif
|
||||
|
||||
#define EIGEN_VMLMODE_EXPAND__
|
||||
|
||||
#define EIGEN_VMLMODE_PREFIX_LA vm
|
||||
#define EIGEN_VMLMODE_PREFIX__ v
|
||||
#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested> \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
|
||||
resize_if_allowed(dst, src, func); \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
|
||||
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
|
||||
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||
} else { \
|
||||
const Index outerSize = dst.outerSize(); \
|
||||
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
|
||||
&(src.nestedExpression().coeffRef(0, outer)); \
|
||||
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
|
||||
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}; \
|
||||
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
|
||||
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA)
|
||||
// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _)
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested, typename Plain> \
|
||||
struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
|
||||
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
|
||||
resize_if_allowed(dst, src, func); \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
|
||||
{ \
|
||||
VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \
|
||||
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||
} else { \
|
||||
const Index outerSize = dst.outerSize(); \
|
||||
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \
|
||||
&(src.lhs().coeffRef(0, outer)); \
|
||||
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
|
||||
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ASSIGN_VML_H
|
||||
353
external/include/eigen3/Eigen/src/Core/BandMatrix.h
vendored
Normal file
353
external/include/eigen3/Eigen/src/Core/BandMatrix.h
vendored
Normal file
@@ -0,0 +1,353 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_BANDMATRIX_H
|
||||
#define EIGEN_BANDMATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived>
|
||||
class BandMatrixBase : public EigenBase<Derived>
|
||||
{
|
||||
public:
|
||||
|
||||
enum {
|
||||
Flags = internal::traits<Derived>::Flags,
|
||||
CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
|
||||
Supers = internal::traits<Derived>::Supers,
|
||||
Subs = internal::traits<Derived>::Subs,
|
||||
Options = internal::traits<Derived>::Options
|
||||
};
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
|
||||
typedef typename DenseMatrixType::StorageIndex StorageIndex;
|
||||
typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;
|
||||
typedef EigenBase<Derived> Base;
|
||||
|
||||
protected:
|
||||
enum {
|
||||
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic))
|
||||
? 1 + Supers + Subs
|
||||
: Dynamic,
|
||||
SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime)
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
using Base::derived;
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
|
||||
/** \returns the number of super diagonals */
|
||||
inline Index supers() const { return derived().supers(); }
|
||||
|
||||
/** \returns the number of sub diagonals */
|
||||
inline Index subs() const { return derived().subs(); }
|
||||
|
||||
/** \returns an expression of the underlying coefficient matrix */
|
||||
inline const CoefficientsType& coeffs() const { return derived().coeffs(); }
|
||||
|
||||
/** \returns an expression of the underlying coefficient matrix */
|
||||
inline CoefficientsType& coeffs() { return derived().coeffs(); }
|
||||
|
||||
/** \returns a vector expression of the \a i -th column,
|
||||
* only the meaningful part is returned.
|
||||
* \warning the internal storage must be column major. */
|
||||
inline Block<CoefficientsType,Dynamic,1> col(Index i)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
|
||||
Index start = 0;
|
||||
Index len = coeffs().rows();
|
||||
if (i<=supers())
|
||||
{
|
||||
start = supers()-i;
|
||||
len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
|
||||
}
|
||||
else if (i>=rows()-subs())
|
||||
len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
|
||||
return Block<CoefficientsType,Dynamic,1>(coeffs(), start, i, len, 1);
|
||||
}
|
||||
|
||||
/** \returns a vector expression of the main diagonal */
|
||||
inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
|
||||
{ return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
|
||||
|
||||
/** \returns a vector expression of the main diagonal (const version) */
|
||||
inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
|
||||
{ return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
|
||||
|
||||
template<int Index> struct DiagonalIntReturnType {
|
||||
enum {
|
||||
ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)),
|
||||
Conjugate = ReturnOpposite && NumTraits<Scalar>::IsComplex,
|
||||
ActualIndex = ReturnOpposite ? -Index : Index,
|
||||
DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
|
||||
? Dynamic
|
||||
: (ActualIndex<0
|
||||
? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
|
||||
: EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
|
||||
};
|
||||
typedef Block<CoefficientsType,1, DiagonalSize> BuildType;
|
||||
typedef typename internal::conditional<Conjugate,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>,BuildType >,
|
||||
BuildType>::type Type;
|
||||
};
|
||||
|
||||
/** \returns a vector expression of the \a N -th sub or super diagonal */
|
||||
template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
|
||||
{
|
||||
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
|
||||
}
|
||||
|
||||
/** \returns a vector expression of the \a N -th sub or super diagonal */
|
||||
template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
|
||||
{
|
||||
return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
|
||||
}
|
||||
|
||||
/** \returns a vector expression of the \a i -th sub or super diagonal */
|
||||
inline Block<CoefficientsType,1,Dynamic> diagonal(Index i)
|
||||
{
|
||||
eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
|
||||
return Block<CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
|
||||
}
|
||||
|
||||
/** \returns a vector expression of the \a i -th sub or super diagonal */
|
||||
inline const Block<const CoefficientsType,1,Dynamic> diagonal(Index i) const
|
||||
{
|
||||
eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
|
||||
return Block<const CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
|
||||
}
|
||||
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
dst.resize(rows(),cols());
|
||||
dst.setZero();
|
||||
dst.diagonal() = diagonal();
|
||||
for (Index i=1; i<=supers();++i)
|
||||
dst.diagonal(i) = diagonal(i);
|
||||
for (Index i=1; i<=subs();++i)
|
||||
dst.diagonal(-i) = diagonal(-i);
|
||||
}
|
||||
|
||||
DenseMatrixType toDenseMatrix() const
|
||||
{
|
||||
DenseMatrixType res(rows(),cols());
|
||||
evalTo(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
inline Index diagonalLength(Index i) const
|
||||
{ return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }
|
||||
};
|
||||
|
||||
/**
|
||||
* \class BandMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a rectangular matrix with a banded storage
|
||||
*
|
||||
* \tparam _Scalar Numeric type, i.e. float, double, int
|
||||
* \tparam _Rows Number of rows, or \b Dynamic
|
||||
* \tparam _Cols Number of columns, or \b Dynamic
|
||||
* \tparam _Supers Number of super diagonal
|
||||
* \tparam _Subs Number of sub diagonal
|
||||
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to
|
||||
* column-major. The latter controls whether the matrix represents a selfadjoint
|
||||
* matrix in which case either Supers of Subs have to be null.
|
||||
*
|
||||
* \sa class TridiagonalMatrix
|
||||
*/
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
|
||||
struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
{
|
||||
typedef _Scalar Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
enum {
|
||||
CoeffReadCost = NumTraits<Scalar>::ReadCost,
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _Rows,
|
||||
MaxColsAtCompileTime = _Cols,
|
||||
Flags = LvalueBit,
|
||||
Supers = _Supers,
|
||||
Subs = _Subs,
|
||||
Options = _Options,
|
||||
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
|
||||
};
|
||||
typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> CoefficientsType;
|
||||
};
|
||||
|
||||
template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
|
||||
class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::traits<BandMatrix>::Scalar Scalar;
|
||||
typedef typename internal::traits<BandMatrix>::StorageIndex StorageIndex;
|
||||
typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;
|
||||
|
||||
explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
|
||||
: m_coeffs(1+supers+subs,cols),
|
||||
m_rows(rows), m_supers(supers), m_subs(subs)
|
||||
{
|
||||
}
|
||||
|
||||
/** \returns the number of columns */
|
||||
inline Index rows() const { return m_rows.value(); }
|
||||
|
||||
/** \returns the number of rows */
|
||||
inline Index cols() const { return m_coeffs.cols(); }
|
||||
|
||||
/** \returns the number of super diagonals */
|
||||
inline Index supers() const { return m_supers.value(); }
|
||||
|
||||
/** \returns the number of sub diagonals */
|
||||
inline Index subs() const { return m_subs.value(); }
|
||||
|
||||
inline const CoefficientsType& coeffs() const { return m_coeffs; }
|
||||
inline CoefficientsType& coeffs() { return m_coeffs; }
|
||||
|
||||
protected:
|
||||
|
||||
CoefficientsType m_coeffs;
|
||||
internal::variable_if_dynamic<Index, Rows> m_rows;
|
||||
internal::variable_if_dynamic<Index, Supers> m_supers;
|
||||
internal::variable_if_dynamic<Index, Subs> m_subs;
|
||||
};
|
||||
|
||||
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
|
||||
class BandMatrixWrapper;
|
||||
|
||||
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
|
||||
struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
{
|
||||
typedef typename _CoefficientsType::Scalar Scalar;
|
||||
typedef typename _CoefficientsType::StorageKind StorageKind;
|
||||
typedef typename _CoefficientsType::StorageIndex StorageIndex;
|
||||
enum {
|
||||
CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _Rows,
|
||||
MaxColsAtCompileTime = _Cols,
|
||||
Flags = LvalueBit,
|
||||
Supers = _Supers,
|
||||
Subs = _Subs,
|
||||
Options = _Options,
|
||||
DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
|
||||
};
|
||||
typedef _CoefficientsType CoefficientsType;
|
||||
};
|
||||
|
||||
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
|
||||
class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;
|
||||
typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
|
||||
typedef typename internal::traits<BandMatrixWrapper>::StorageIndex StorageIndex;
|
||||
|
||||
explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
|
||||
: m_coeffs(coeffs),
|
||||
m_rows(rows), m_supers(supers), m_subs(subs)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
//internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
|
||||
}
|
||||
|
||||
/** \returns the number of columns */
|
||||
inline Index rows() const { return m_rows.value(); }
|
||||
|
||||
/** \returns the number of rows */
|
||||
inline Index cols() const { return m_coeffs.cols(); }
|
||||
|
||||
/** \returns the number of super diagonals */
|
||||
inline Index supers() const { return m_supers.value(); }
|
||||
|
||||
/** \returns the number of sub diagonals */
|
||||
inline Index subs() const { return m_subs.value(); }
|
||||
|
||||
inline const CoefficientsType& coeffs() const { return m_coeffs; }
|
||||
|
||||
protected:
|
||||
|
||||
const CoefficientsType& m_coeffs;
|
||||
internal::variable_if_dynamic<Index, _Rows> m_rows;
|
||||
internal::variable_if_dynamic<Index, _Supers> m_supers;
|
||||
internal::variable_if_dynamic<Index, _Subs> m_subs;
|
||||
};
|
||||
|
||||
/**
|
||||
* \class TridiagonalMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a tridiagonal matrix with a compact banded storage
|
||||
*
|
||||
* \tparam Scalar Numeric type, i.e. float, double, int
|
||||
* \tparam Size Number of rows and cols, or \b Dynamic
|
||||
* \tparam Options Can be 0 or \b SelfAdjoint
|
||||
*
|
||||
* \sa class BandMatrix
|
||||
*/
|
||||
template<typename Scalar, int Size, int Options>
|
||||
class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>
|
||||
{
|
||||
typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
|
||||
typedef typename Base::StorageIndex StorageIndex;
|
||||
public:
|
||||
explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
|
||||
|
||||
inline typename Base::template DiagonalIntReturnType<1>::Type super()
|
||||
{ return Base::template diagonal<1>(); }
|
||||
inline const typename Base::template DiagonalIntReturnType<1>::Type super() const
|
||||
{ return Base::template diagonal<1>(); }
|
||||
inline typename Base::template DiagonalIntReturnType<-1>::Type sub()
|
||||
{ return Base::template diagonal<-1>(); }
|
||||
inline const typename Base::template DiagonalIntReturnType<-1>::Type sub() const
|
||||
{ return Base::template diagonal<-1>(); }
|
||||
protected:
|
||||
};
|
||||
|
||||
|
||||
struct BandShape {};
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
|
||||
struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
: public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
{
|
||||
typedef BandShape Shape;
|
||||
};
|
||||
|
||||
template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
|
||||
struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
: public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
|
||||
{
|
||||
typedef BandShape Shape;
|
||||
};
|
||||
|
||||
template<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_BANDMATRIX_H
|
||||
452
external/include/eigen3/Eigen/src/Core/Block.h
vendored
Normal file
452
external/include/eigen3/Eigen/src/Core/Block.h
vendored
Normal file
@@ -0,0 +1,452 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_BLOCK_H
|
||||
#define EIGEN_BLOCK_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
|
||||
{
|
||||
typedef typename traits<XprType>::Scalar Scalar;
|
||||
typedef typename traits<XprType>::StorageKind StorageKind;
|
||||
typedef typename traits<XprType>::XprKind XprKind;
|
||||
typedef typename ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
enum{
|
||||
MatrixRows = traits<XprType>::RowsAtCompileTime,
|
||||
MatrixCols = traits<XprType>::ColsAtCompileTime,
|
||||
RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
|
||||
ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
|
||||
MaxRowsAtCompileTime = BlockRows==0 ? 0
|
||||
: RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
|
||||
: int(traits<XprType>::MaxRowsAtCompileTime),
|
||||
MaxColsAtCompileTime = BlockCols==0 ? 0
|
||||
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
|
||||
: int(traits<XprType>::MaxColsAtCompileTime),
|
||||
|
||||
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
|
||||
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
||||
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
||||
: XprTypeIsRowMajor,
|
||||
HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
|
||||
InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
|
||||
InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
|
||||
? int(inner_stride_at_compile_time<XprType>::ret)
|
||||
: int(outer_stride_at_compile_time<XprType>::ret),
|
||||
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
|
||||
? int(outer_stride_at_compile_time<XprType>::ret)
|
||||
: int(inner_stride_at_compile_time<XprType>::ret),
|
||||
|
||||
// FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
|
||||
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
|
||||
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
|
||||
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
|
||||
// FIXME DirectAccessBit should not be handled by expressions
|
||||
//
|
||||
// Alignment is needed by MapBase's assertions
|
||||
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
|
||||
Alignment = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
|
||||
bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
|
||||
|
||||
/** \class Block
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size block
|
||||
*
|
||||
* \tparam XprType the type of the expression in which we are taking a block
|
||||
* \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
|
||||
* \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
|
||||
* \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
|
||||
* to set of columns of a column major matrix (optional). The parameter allows to determine
|
||||
* at compile time whether aligned access is possible on the block expression.
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
||||
* type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
|
||||
* most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to directly maniputate block expressions,
|
||||
* for instance if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* Here is an example illustrating the dynamic case:
|
||||
* \include class_Block.cpp
|
||||
* Output: \verbinclude class_Block.out
|
||||
*
|
||||
* \note Even though this expression has dynamic size, in the case where \a XprType
|
||||
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
|
||||
* it does not cause a dynamic memory allocation.
|
||||
*
|
||||
* Here is an example illustrating the fixed-size case:
|
||||
* \include class_FixedBlock.cpp
|
||||
* Output: \verbinclude class_FixedBlock.out
|
||||
*
|
||||
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
|
||||
*/
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
|
||||
: public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
|
||||
{
|
||||
typedef BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> Impl;
|
||||
public:
|
||||
//typedef typename Impl::Base Base;
|
||||
typedef Impl Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
|
||||
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr, Index i) : Impl(xpr,i)
|
||||
{
|
||||
eigen_assert( (i>=0) && (
|
||||
((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
|
||||
||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
|
||||
}
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr, Index startRow, Index startCol)
|
||||
: Impl(xpr, startRow, startCol)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
|
||||
eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
|
||||
&& startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
|
||||
}
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Block(XprType& xpr,
|
||||
Index startRow, Index startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
: Impl(xpr, startRow, startCol, blockRows, blockCols)
|
||||
{
|
||||
eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
|
||||
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
|
||||
eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows
|
||||
&& startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
|
||||
}
|
||||
};
|
||||
|
||||
// The generic default implementation for dense block simplu forward to the internal::BlockImpl_dense
|
||||
// that must be specialized for direct and non-direct access...
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
|
||||
: public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
|
||||
{
|
||||
typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
public:
|
||||
typedef Impl Base;
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
|
||||
EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
|
||||
: Impl(xpr, startRow, startCol, blockRows, blockCols) {}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal Internal implementation of dense Blocks in the general case. */
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class BlockImpl_dense
|
||||
: public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
|
||||
{
|
||||
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
|
||||
typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<BlockType>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
|
||||
|
||||
// class InnerIterator; // FIXME apparently never used
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index i)
|
||||
: m_xpr(xpr),
|
||||
// It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
|
||||
// and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1,
|
||||
// all other cases are invalid.
|
||||
// The case a 1x1 matrix seems ambiguous, but the result is the same anyway.
|
||||
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
|
||||
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
|
||||
m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
|
||||
m_blockCols(BlockCols==1 ? 1 : xpr.cols())
|
||||
{}
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
|
||||
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
|
||||
m_blockRows(BlockRows), m_blockCols(BlockCols)
|
||||
{}
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr,
|
||||
Index startRow, Index startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
: m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
|
||||
m_blockRows(blockRows), m_blockCols(blockCols)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||
return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||
{
|
||||
m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_xpr.template packet<Unaligned>
|
||||
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& val)
|
||||
{
|
||||
m_xpr.template writePacket<Unaligned>
|
||||
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \sa MapBase::data() */
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const;
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const;
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
XprType& nestedExpression() { return m_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
StorageIndex startRow() const
|
||||
{
|
||||
return m_startRow.value();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
StorageIndex startCol() const
|
||||
{
|
||||
return m_startCol.value();
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
XprTypeNested m_xpr;
|
||||
const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
|
||||
const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
|
||||
const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
|
||||
const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
|
||||
};
|
||||
|
||||
/** \internal Internal implementation of dense Blocks in the direct access case.*/
|
||||
template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||
class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
|
||||
: public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
|
||||
{
|
||||
typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
|
||||
typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
|
||||
enum {
|
||||
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
|
||||
};
|
||||
public:
|
||||
|
||||
typedef MapBase<BlockType> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
|
||||
|
||||
/** Column or Row constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index i)
|
||||
: Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor))
|
||||
|| ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
|
||||
BlockRows==1 ? 1 : xpr.rows(),
|
||||
BlockCols==1 ? 1 : xpr.cols()),
|
||||
m_xpr(xpr),
|
||||
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
|
||||
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
|
||||
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
|
||||
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr,
|
||||
Index startRow, Index startCol,
|
||||
Index blockRows, Index blockCols)
|
||||
: Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
|
||||
m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
|
||||
{
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
XprType& nestedExpression() { return m_xpr; }
|
||||
|
||||
/** \sa MapBase::innerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
||||
? m_xpr.innerStride()
|
||||
: m_xpr.outerStride();
|
||||
}
|
||||
|
||||
/** \sa MapBase::outerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return m_outerStride;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
StorageIndex startRow() const
|
||||
{
|
||||
return m_startRow.value();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
StorageIndex startCol() const
|
||||
{
|
||||
return m_startCol.value();
|
||||
}
|
||||
|
||||
#ifndef __SUNPRO_CC
|
||||
// FIXME sunstudio is not friendly with the above friend...
|
||||
// META-FIXME there is no 'friend' keyword around here. Is this obsolete?
|
||||
protected:
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal used by allowAligned() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
|
||||
: Base(data, blockRows, blockCols), m_xpr(xpr)
|
||||
{
|
||||
init();
|
||||
}
|
||||
#endif
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC
|
||||
void init()
|
||||
{
|
||||
m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
|
||||
? m_xpr.outerStride()
|
||||
: m_xpr.innerStride();
|
||||
}
|
||||
|
||||
XprTypeNested m_xpr;
|
||||
const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
|
||||
const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
|
||||
Index m_outerStride;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_BLOCK_H
|
||||
164
external/include/eigen3/Eigen/src/Core/BooleanRedux.h
vendored
Normal file
164
external/include/eigen3/Eigen/src/Core/BooleanRedux.h
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ALLANDANY_H
|
||||
#define EIGEN_ALLANDANY_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived, int UnrollCount>
|
||||
struct all_unroller
|
||||
{
|
||||
typedef typename Derived::ExpressionTraits Traits;
|
||||
enum {
|
||||
col = (UnrollCount-1) / Traits::RowsAtCompileTime,
|
||||
row = (UnrollCount-1) % Traits::RowsAtCompileTime
|
||||
};
|
||||
|
||||
static inline bool run(const Derived &mat)
|
||||
{
|
||||
return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct all_unroller<Derived, 0>
|
||||
{
|
||||
static inline bool run(const Derived &/*mat*/) { return true; }
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct all_unroller<Derived, Dynamic>
|
||||
{
|
||||
static inline bool run(const Derived &) { return false; }
|
||||
};
|
||||
|
||||
template<typename Derived, int UnrollCount>
|
||||
struct any_unroller
|
||||
{
|
||||
typedef typename Derived::ExpressionTraits Traits;
|
||||
enum {
|
||||
col = (UnrollCount-1) / Traits::RowsAtCompileTime,
|
||||
row = (UnrollCount-1) % Traits::RowsAtCompileTime
|
||||
};
|
||||
|
||||
static inline bool run(const Derived &mat)
|
||||
{
|
||||
return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct any_unroller<Derived, 0>
|
||||
{
|
||||
static inline bool run(const Derived & /*mat*/) { return false; }
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct any_unroller<Derived, Dynamic>
|
||||
{
|
||||
static inline bool run(const Derived &) { return false; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns true if all coefficients are true
|
||||
*
|
||||
* Example: \include MatrixBase_all.cpp
|
||||
* Output: \verbinclude MatrixBase_all.out
|
||||
*
|
||||
* \sa any(), Cwise::operator<()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline bool DenseBase<Derived>::all() const
|
||||
{
|
||||
typedef internal::evaluator<Derived> Evaluator;
|
||||
enum {
|
||||
unroll = SizeAtCompileTime != Dynamic
|
||||
&& SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
|
||||
};
|
||||
Evaluator evaluator(derived());
|
||||
if(unroll)
|
||||
return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
|
||||
else
|
||||
{
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if (!evaluator.coeff(i, j)) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/** \returns true if at least one coefficient is true
|
||||
*
|
||||
* \sa all()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline bool DenseBase<Derived>::any() const
|
||||
{
|
||||
typedef internal::evaluator<Derived> Evaluator;
|
||||
enum {
|
||||
unroll = SizeAtCompileTime != Dynamic
|
||||
&& SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
|
||||
};
|
||||
Evaluator evaluator(derived());
|
||||
if(unroll)
|
||||
return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
|
||||
else
|
||||
{
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if (evaluator.coeff(i, j)) return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** \returns the number of coefficients which evaluate to true
|
||||
*
|
||||
* \sa all(), any()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Eigen::Index DenseBase<Derived>::count() const
|
||||
{
|
||||
return derived().template cast<bool>().template cast<Index>().sum();
|
||||
}
|
||||
|
||||
/** \returns true is \c *this contains at least one Not A Number (NaN).
|
||||
*
|
||||
* \sa allFinite()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline bool DenseBase<Derived>::hasNaN() const
|
||||
{
|
||||
#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
|
||||
return derived().array().isNaN().any();
|
||||
#else
|
||||
return !((derived().array()==derived().array()).all());
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
|
||||
*
|
||||
* \sa hasNaN()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline bool DenseBase<Derived>::allFinite() const
|
||||
{
|
||||
#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
|
||||
return derived().array().isFinite().all();
|
||||
#else
|
||||
return !((derived()-derived()).hasNaN());
|
||||
#endif
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_ALLANDANY_H
|
||||
160
external/include/eigen3/Eigen/src/Core/CommaInitializer.h
vendored
Normal file
160
external/include/eigen3/Eigen/src/Core/CommaInitializer.h
vendored
Normal file
@@ -0,0 +1,160 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMMAINITIALIZER_H
|
||||
#define EIGEN_COMMAINITIALIZER_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class CommaInitializer
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Helper class used by the comma initializer operator
|
||||
*
|
||||
* This class is internally used to implement the comma initializer feature. It is
|
||||
* the return type of MatrixBase::operator<<, and most of the time this is the only
|
||||
* way it is used.
|
||||
*
|
||||
* \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
|
||||
*/
|
||||
template<typename XprType>
|
||||
struct CommaInitializer
|
||||
{
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(XprType& xpr, const Scalar& s)
|
||||
: m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
|
||||
{
|
||||
m_xpr.coeffRef(0,0) = s;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
|
||||
: m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
|
||||
{
|
||||
m_xpr.block(0, 0, other.rows(), other.cols()) = other;
|
||||
}
|
||||
|
||||
/* Copy/Move constructor which transfers ownership. This is crucial in
|
||||
* absence of return value optimization to avoid assertions during destruction. */
|
||||
// FIXME in C++11 mode this could be replaced by a proper RValue constructor
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CommaInitializer(const CommaInitializer& o)
|
||||
: m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
|
||||
// Mark original object as finished. In absence of R-value references we need to const_cast:
|
||||
const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();
|
||||
const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();
|
||||
const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;
|
||||
}
|
||||
|
||||
/* inserts a scalar value in the target matrix */
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer& operator,(const Scalar& s)
|
||||
{
|
||||
if (m_col==m_xpr.cols())
|
||||
{
|
||||
m_row+=m_currentBlockRows;
|
||||
m_col = 0;
|
||||
m_currentBlockRows = 1;
|
||||
eigen_assert(m_row<m_xpr.rows()
|
||||
&& "Too many rows passed to comma initializer (operator<<)");
|
||||
}
|
||||
eigen_assert(m_col<m_xpr.cols()
|
||||
&& "Too many coefficients passed to comma initializer (operator<<)");
|
||||
eigen_assert(m_currentBlockRows==1);
|
||||
m_xpr.coeffRef(m_row, m_col++) = s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/* inserts a matrix expression in the target matrix */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows))
|
||||
{
|
||||
m_row+=m_currentBlockRows;
|
||||
m_col = 0;
|
||||
m_currentBlockRows = other.rows();
|
||||
eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
|
||||
&& "Too many rows passed to comma initializer (operator<<)");
|
||||
}
|
||||
eigen_assert((m_col + other.cols() <= m_xpr.cols())
|
||||
&& "Too many coefficients passed to comma initializer (operator<<)");
|
||||
eigen_assert(m_currentBlockRows==other.rows());
|
||||
m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>
|
||||
(m_row, m_col, other.rows(), other.cols()) = other;
|
||||
m_col += other.cols();
|
||||
return *this;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ~CommaInitializer()
|
||||
#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS
|
||||
EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception)
|
||||
#endif
|
||||
{
|
||||
finished();
|
||||
}
|
||||
|
||||
/** \returns the built matrix once all its coefficients have been set.
|
||||
* Calling finished is 100% optional. Its purpose is to write expressions
|
||||
* like this:
|
||||
* \code
|
||||
* quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
|
||||
* \endcode
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline XprType& finished() {
|
||||
eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0)
|
||||
&& m_col == m_xpr.cols()
|
||||
&& "Too few coefficients passed to comma initializer (operator<<)");
|
||||
return m_xpr;
|
||||
}
|
||||
|
||||
XprType& m_xpr; // target expression
|
||||
Index m_row; // current row id
|
||||
Index m_col; // current col id
|
||||
Index m_currentBlockRows; // current block height
|
||||
};
|
||||
|
||||
/** \anchor MatrixBaseCommaInitRef
|
||||
* Convenient operator to set the coefficients of a matrix.
|
||||
*
|
||||
* The coefficients must be provided in a row major order and exactly match
|
||||
* the size of the matrix. Otherwise an assertion is raised.
|
||||
*
|
||||
* Example: \include MatrixBase_set.cpp
|
||||
* Output: \verbinclude MatrixBase_set.out
|
||||
*
|
||||
* \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order.
|
||||
*
|
||||
* \sa CommaInitializer::finished(), class CommaInitializer
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s)
|
||||
{
|
||||
return CommaInitializer<Derived>(*static_cast<Derived*>(this), s);
|
||||
}
|
||||
|
||||
/** \sa operator<<(const Scalar&) */
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline CommaInitializer<Derived>
|
||||
DenseBase<Derived>::operator<<(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return CommaInitializer<Derived>(*static_cast<Derived *>(this), other);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMMAINITIALIZER_H
|
||||
175
external/include/eigen3/Eigen/src/Core/ConditionEstimator.h
vendored
Normal file
175
external/include/eigen3/Eigen/src/Core/ConditionEstimator.h
vendored
Normal file
@@ -0,0 +1,175 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CONDITIONESTIMATOR_H
|
||||
#define EIGEN_CONDITIONESTIMATOR_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename Vector, typename RealVector, bool IsComplex>
|
||||
struct rcond_compute_sign {
|
||||
static inline Vector run(const Vector& v) {
|
||||
const RealVector v_abs = v.cwiseAbs();
|
||||
return (v_abs.array() == static_cast<typename Vector::RealScalar>(0))
|
||||
.select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs));
|
||||
}
|
||||
};
|
||||
|
||||
// Partial specialization to avoid elementwise division for real vectors.
|
||||
template <typename Vector>
|
||||
struct rcond_compute_sign<Vector, Vector, false> {
|
||||
static inline Vector run(const Vector& v) {
|
||||
return (v.array() < static_cast<typename Vector::RealScalar>(0))
|
||||
.select(-Vector::Ones(v.size()), Vector::Ones(v.size()));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \returns an estimate of ||inv(matrix)||_1 given a decomposition of
|
||||
* \a matrix that implements .solve() and .adjoint().solve() methods.
|
||||
*
|
||||
* This function implements Algorithms 4.1 and 5.1 from
|
||||
* http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf
|
||||
* which also forms the basis for the condition number estimators in
|
||||
* LAPACK. Since at most 10 calls to the solve method of dec are
|
||||
* performed, the total cost is O(dims^2), as opposed to O(dims^3)
|
||||
* needed to compute the inverse matrix explicitly.
|
||||
*
|
||||
* The most common usage is in estimating the condition number
|
||||
* ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be
|
||||
* computed directly in O(n^2) operations.
|
||||
*
|
||||
* Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and
|
||||
* LLT.
|
||||
*
|
||||
* \sa FullPivLU, PartialPivLU, LDLT, LLT.
|
||||
*/
|
||||
template <typename Decomposition>
|
||||
typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec)
|
||||
{
|
||||
typedef typename Decomposition::MatrixType MatrixType;
|
||||
typedef typename Decomposition::Scalar Scalar;
|
||||
typedef typename Decomposition::RealScalar RealScalar;
|
||||
typedef typename internal::plain_col_type<MatrixType>::type Vector;
|
||||
typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVector;
|
||||
const bool is_complex = (NumTraits<Scalar>::IsComplex != 0);
|
||||
|
||||
eigen_assert(dec.rows() == dec.cols());
|
||||
const Index n = dec.rows();
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
// Disable Index to float conversion warning
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning push
|
||||
#pragma warning ( disable : 2259 )
|
||||
#endif
|
||||
Vector v = dec.solve(Vector::Ones(n) / Scalar(n));
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning pop
|
||||
#endif
|
||||
|
||||
// lower_bound is a lower bound on
|
||||
// ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1
|
||||
// and is the objective maximized by the ("super-") gradient ascent
|
||||
// algorithm below.
|
||||
RealScalar lower_bound = v.template lpNorm<1>();
|
||||
if (n == 1)
|
||||
return lower_bound;
|
||||
|
||||
// Gradient ascent algorithm follows: We know that the optimum is achieved at
|
||||
// one of the simplices v = e_i, so in each iteration we follow a
|
||||
// super-gradient to move towards the optimal one.
|
||||
RealScalar old_lower_bound = lower_bound;
|
||||
Vector sign_vector(n);
|
||||
Vector old_sign_vector;
|
||||
Index v_max_abs_index = -1;
|
||||
Index old_v_max_abs_index = v_max_abs_index;
|
||||
for (int k = 0; k < 4; ++k)
|
||||
{
|
||||
sign_vector = internal::rcond_compute_sign<Vector, RealVector, is_complex>::run(v);
|
||||
if (k > 0 && !is_complex && sign_vector == old_sign_vector) {
|
||||
// Break if the solution stagnated.
|
||||
break;
|
||||
}
|
||||
// v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )|
|
||||
v = dec.adjoint().solve(sign_vector);
|
||||
v.real().cwiseAbs().maxCoeff(&v_max_abs_index);
|
||||
if (v_max_abs_index == old_v_max_abs_index) {
|
||||
// Break if the solution stagnated.
|
||||
break;
|
||||
}
|
||||
// Move to the new simplex e_j, where j = v_max_abs_index.
|
||||
v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j.
|
||||
lower_bound = v.template lpNorm<1>();
|
||||
if (lower_bound <= old_lower_bound) {
|
||||
// Break if the gradient step did not increase the lower_bound.
|
||||
break;
|
||||
}
|
||||
if (!is_complex) {
|
||||
old_sign_vector = sign_vector;
|
||||
}
|
||||
old_v_max_abs_index = v_max_abs_index;
|
||||
old_lower_bound = lower_bound;
|
||||
}
|
||||
// The following calculates an independent estimate of ||matrix||_1 by
|
||||
// multiplying matrix by a vector with entries of slowly increasing
|
||||
// magnitude and alternating sign:
|
||||
// v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1.
|
||||
// This improvement to Hager's algorithm above is due to Higham. It was
|
||||
// added to make the algorithm more robust in certain corner cases where
|
||||
// large elements in the matrix might otherwise escape detection due to
|
||||
// exact cancellation (especially when op and op_adjoint correspond to a
|
||||
// sequence of backsubstitutions and permutations), which could cause
|
||||
// Hager's algorithm to vastly underestimate ||matrix||_1.
|
||||
Scalar alternating_sign(RealScalar(1));
|
||||
for (Index i = 0; i < n; ++i) {
|
||||
// The static_cast is needed when Scalar is a complex and RealScalar implements expression templates
|
||||
v[i] = alternating_sign * static_cast<RealScalar>(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));
|
||||
alternating_sign = -alternating_sign;
|
||||
}
|
||||
v = dec.solve(v);
|
||||
const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n));
|
||||
return numext::maxi(lower_bound, alternate_lower_bound);
|
||||
}
|
||||
|
||||
/** \brief Reciprocal condition number estimator.
|
||||
*
|
||||
* Computing a decomposition of a dense matrix takes O(n^3) operations, while
|
||||
* this method estimates the condition number quickly and reliably in O(n^2)
|
||||
* operations.
|
||||
*
|
||||
* \returns an estimate of the reciprocal condition number
|
||||
* (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and
|
||||
* its decomposition. Supports the following decompositions: FullPivLU,
|
||||
* PartialPivLU, LDLT, and LLT.
|
||||
*
|
||||
* \sa FullPivLU, PartialPivLU, LDLT, LLT.
|
||||
*/
|
||||
template <typename Decomposition>
|
||||
typename Decomposition::RealScalar
|
||||
rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)
|
||||
{
|
||||
typedef typename Decomposition::RealScalar RealScalar;
|
||||
eigen_assert(dec.rows() == dec.cols());
|
||||
if (dec.rows() == 0) return NumTraits<RealScalar>::infinity();
|
||||
if (matrix_norm == RealScalar(0)) return RealScalar(0);
|
||||
if (dec.rows() == 1) return RealScalar(1);
|
||||
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
|
||||
return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
|
||||
: (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif
|
||||
1688
external/include/eigen3/Eigen/src/Core/CoreEvaluators.h
vendored
Normal file
1688
external/include/eigen3/Eigen/src/Core/CoreEvaluators.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
127
external/include/eigen3/Eigen/src/Core/CoreIterators.h
vendored
Normal file
127
external/include/eigen3/Eigen/src/Core/CoreIterators.h
vendored
Normal file
@@ -0,0 +1,127 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COREITERATORS_H
|
||||
#define EIGEN_COREITERATORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename XprType, typename EvaluatorKind>
|
||||
class inner_iterator_selector;
|
||||
|
||||
}
|
||||
|
||||
/** \class InnerIterator
|
||||
* \brief An InnerIterator allows to loop over the element of any matrix expression.
|
||||
*
|
||||
* \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed.
|
||||
*
|
||||
* TODO: add a usage example
|
||||
*/
|
||||
template<typename XprType>
|
||||
class InnerIterator
|
||||
{
|
||||
protected:
|
||||
typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
|
||||
typedef internal::evaluator<XprType> EvaluatorType;
|
||||
typedef typename internal::traits<XprType>::Scalar Scalar;
|
||||
public:
|
||||
/** Construct an iterator over the \a outerId -th row or column of \a xpr */
|
||||
InnerIterator(const XprType &xpr, const Index &outerId)
|
||||
: m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize())
|
||||
{}
|
||||
|
||||
/// \returns the value of the current coefficient.
|
||||
EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); }
|
||||
/** Increment the iterator \c *this to the next non-zero coefficient.
|
||||
* Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView
|
||||
*/
|
||||
EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; }
|
||||
/// \returns the column or row index of the current coefficient.
|
||||
EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); }
|
||||
/// \returns the row index of the current coefficient.
|
||||
EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); }
|
||||
/// \returns the column index of the current coefficient.
|
||||
EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); }
|
||||
/// \returns \c true if the iterator \c *this still references a valid coefficient.
|
||||
EIGEN_STRONG_INLINE operator bool() const { return m_iter; }
|
||||
|
||||
protected:
|
||||
EvaluatorType m_eval;
|
||||
IteratorType m_iter;
|
||||
private:
|
||||
// If you get here, then you're not using the right InnerIterator type, e.g.:
|
||||
// SparseMatrix<double,RowMajor> A;
|
||||
// SparseMatrix<double>::InnerIterator it(A,0);
|
||||
template<typename T> InnerIterator(const EigenBase<T>&,Index outer);
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Generic inner iterator implementation for dense objects
|
||||
template<typename XprType>
|
||||
class inner_iterator_selector<XprType, IndexBased>
|
||||
{
|
||||
protected:
|
||||
typedef evaluator<XprType> EvaluatorType;
|
||||
typedef typename traits<XprType>::Scalar Scalar;
|
||||
enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
|
||||
|
||||
public:
|
||||
EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize)
|
||||
: m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize)
|
||||
{}
|
||||
|
||||
EIGEN_STRONG_INLINE Scalar value() const
|
||||
{
|
||||
return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner)
|
||||
: m_eval.coeff(m_inner, m_outer);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; }
|
||||
|
||||
EIGEN_STRONG_INLINE Index index() const { return m_inner; }
|
||||
inline Index row() const { return IsRowMajor ? m_outer : index(); }
|
||||
inline Index col() const { return IsRowMajor ? index() : m_outer; }
|
||||
|
||||
EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
|
||||
|
||||
protected:
|
||||
const EvaluatorType& m_eval;
|
||||
Index m_inner;
|
||||
const Index m_outer;
|
||||
const Index m_end;
|
||||
};
|
||||
|
||||
// For iterator-based evaluator, inner-iterator is already implemented as
|
||||
// evaluator<>::InnerIterator
|
||||
template<typename XprType>
|
||||
class inner_iterator_selector<XprType, IteratorBased>
|
||||
: public evaluator<XprType>::InnerIterator
|
||||
{
|
||||
protected:
|
||||
typedef typename evaluator<XprType>::InnerIterator Base;
|
||||
typedef evaluator<XprType> EvaluatorType;
|
||||
|
||||
public:
|
||||
EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/)
|
||||
: Base(eval, outerId)
|
||||
{}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COREITERATORS_H
|
||||
184
external/include/eigen3/Eigen/src/Core/CwiseBinaryOp.h
vendored
Normal file
184
external/include/eigen3/Eigen/src/Core/CwiseBinaryOp.h
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CWISE_BINARY_OP_H
|
||||
#define EIGEN_CWISE_BINARY_OP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs>
|
||||
struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
{
|
||||
// we must not inherit from traits<Lhs> since it has
|
||||
// the potential to cause problems with MSVC
|
||||
typedef typename remove_all<Lhs>::type Ancestor;
|
||||
typedef typename traits<Ancestor>::XprKind XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
|
||||
};
|
||||
|
||||
// even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor),
|
||||
// we still want to handle the case when the result type is different.
|
||||
typedef typename result_of<
|
||||
BinaryOp(
|
||||
const typename Lhs::Scalar&,
|
||||
const typename Rhs::Scalar&
|
||||
)
|
||||
>::type Scalar;
|
||||
typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
|
||||
typename traits<Rhs>::StorageKind,
|
||||
BinaryOp>::ret StorageKind;
|
||||
typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,
|
||||
typename traits<Rhs>::StorageIndex>::type StorageIndex;
|
||||
typedef typename Lhs::Nested LhsNested;
|
||||
typedef typename Rhs::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
enum {
|
||||
Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
|
||||
};
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
|
||||
class CwiseBinaryOpImpl;
|
||||
|
||||
/** \class CwiseBinaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
|
||||
*
|
||||
* \tparam BinaryOp template functor implementing the operator
|
||||
* \tparam LhsType the type of the left-hand side
|
||||
* \tparam RhsType the type of the right-hand side
|
||||
*
|
||||
* This class represents an expression where a coefficient-wise binary operator is applied to two expressions.
|
||||
* It is the return type of binary operators, by which we mean only those binary operators where
|
||||
* both the left-hand side and the right-hand side are Eigen expressions.
|
||||
* For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseBinaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
template<typename BinaryOp, typename LhsType, typename RhsType>
|
||||
class CwiseBinaryOp :
|
||||
public CwiseBinaryOpImpl<
|
||||
BinaryOp, LhsType, RhsType,
|
||||
typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
|
||||
typename internal::traits<RhsType>::StorageKind,
|
||||
BinaryOp>::ret>,
|
||||
internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::remove_all<BinaryOp>::type Functor;
|
||||
typedef typename internal::remove_all<LhsType>::type Lhs;
|
||||
typedef typename internal::remove_all<RhsType>::type Rhs;
|
||||
|
||||
typedef typename CwiseBinaryOpImpl<
|
||||
BinaryOp, LhsType, RhsType,
|
||||
typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
|
||||
typename internal::traits<Rhs>::StorageKind,
|
||||
BinaryOp>::ret>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
|
||||
|
||||
typedef typename internal::ref_selector<LhsType>::type LhsNested;
|
||||
typedef typename internal::ref_selector<RhsType>::type RhsNested;
|
||||
typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
|
||||
: m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
|
||||
{
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
|
||||
// require the sizes to match
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
|
||||
eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const {
|
||||
// return the fixed size type if available to enable compile time optimizations
|
||||
if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
|
||||
return m_rhs.rows();
|
||||
else
|
||||
return m_lhs.rows();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const {
|
||||
// return the fixed size type if available to enable compile time optimizations
|
||||
if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
|
||||
return m_rhs.cols();
|
||||
else
|
||||
return m_lhs.cols();
|
||||
}
|
||||
|
||||
/** \returns the left hand side nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _LhsNested& lhs() const { return m_lhs; }
|
||||
/** \returns the right hand side nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _RhsNested& rhs() const { return m_rhs; }
|
||||
/** \returns the functor representing the binary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const BinaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
LhsNested m_lhs;
|
||||
RhsNested m_rhs;
|
||||
const BinaryOp m_functor;
|
||||
};
|
||||
|
||||
// Generic API dispatcher
|
||||
template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
|
||||
class CwiseBinaryOpImpl
|
||||
: public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
|
||||
{
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
|
||||
};
|
||||
|
||||
/** replaces \c *this by \c *this - \a other.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this + \a other.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE Derived &
|
||||
MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CWISE_BINARY_OP_H
|
||||
|
||||
866
external/include/eigen3/Eigen/src/Core/CwiseNullaryOp.h
vendored
Normal file
866
external/include/eigen3/Eigen/src/Core/CwiseNullaryOp.h
vendored
Normal file
@@ -0,0 +1,866 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CWISE_NULLARY_OP_H
|
||||
#define EIGEN_CWISE_NULLARY_OP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename NullaryOp, typename PlainObjectType>
|
||||
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
|
||||
{
|
||||
enum {
|
||||
Flags = traits<PlainObjectType>::Flags & RowMajorBit
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
/** \class CwiseNullaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression of a matrix where all coefficients are defined by a functor
|
||||
*
|
||||
* \tparam NullaryOp template functor implementing the operator
|
||||
* \tparam PlainObjectType the underlying plain matrix/array type
|
||||
*
|
||||
* This class represents an expression of a generic nullary operator.
|
||||
* It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* The functor NullaryOp must expose one of the following method:
|
||||
<table class="manual">
|
||||
<tr ><td>\c operator()() </td><td>if the procedural generation does not depend on the coefficient entries (e.g., random numbers)</td></tr>
|
||||
<tr class="alt"><td>\c operator()(Index i)</td><td>if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) </td></tr>
|
||||
<tr ><td>\c operator()(Index i,Index j)</td><td>if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)</td></tr>
|
||||
</table>
|
||||
* It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors.
|
||||
*
|
||||
* See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding
|
||||
* C++11 random number generators.
|
||||
*
|
||||
* A nullary expression can also be used to implement custom sophisticated matrix manipulations
|
||||
* that cannot be covered by the existing set of natively supported matrix manipulations.
|
||||
* See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations
|
||||
* on the behavior of CwiseNullaryOp.
|
||||
*
|
||||
* \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr
|
||||
*/
|
||||
template<typename NullaryOp, typename PlainObjectType>
|
||||
class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
|
||||
: m_rows(rows), m_cols(cols), m_functor(func)
|
||||
{
|
||||
eigen_assert(rows >= 0
|
||||
&& (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols >= 0
|
||||
&& (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
|
||||
|
||||
/** \returns the functor representing the nullary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const NullaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
|
||||
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
|
||||
const NullaryOp m_functor;
|
||||
};
|
||||
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
*
|
||||
* The parameter \a size is the size of the returned vector.
|
||||
* Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so Zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* Here is an example with C++11 random generators: \include random_cpp11.cpp
|
||||
* Output: \verbinclude random_cpp11.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
|
||||
else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
*
|
||||
* This variant is only for fixed-size DenseBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this DenseBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
|
||||
}
|
||||
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
*
|
||||
* The parameter \a size is the size of the returned vector.
|
||||
* Must be compatible with this DenseBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so Zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Constant(Index size, const Scalar& value)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
|
||||
}
|
||||
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
*
|
||||
* This variant is only for fixed-size DenseBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
* The template parameter \a CustomNullaryOp is the type of the functor.
|
||||
*
|
||||
* \sa class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Constant(const Scalar& value)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
|
||||
}
|
||||
|
||||
/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)
|
||||
*
|
||||
* \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
|
||||
}
|
||||
|
||||
/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)
|
||||
*
|
||||
* \sa LinSpaced(Scalar,Scalar)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly spaced vector.
|
||||
*
|
||||
* The function generates 'size' equally spaced values in the closed interval [low,high].
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include DenseBase_LinSpaced.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpaced.out
|
||||
*
|
||||
* For integer scalar types, an even spacing is possible if and only if the length of the range,
|
||||
* i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the
|
||||
* number of values \c high-low+1 (meaning each value can be repeated the same number of time).
|
||||
* If one of these two considions is not satisfied, then \c high is lowered to the largest value
|
||||
* satisfying one of this constraint.
|
||||
* Here are some examples:
|
||||
*
|
||||
* Example: \include DenseBase_LinSpacedInt.cpp
|
||||
* Output: \verbinclude DenseBase_LinSpacedInt.out
|
||||
*
|
||||
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
|
||||
}
|
||||
|
||||
/**
|
||||
* \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)
|
||||
* Special version for fixed size types which does not require the size parameter.
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
|
||||
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
|
||||
}
|
||||
|
||||
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
|
||||
(const Scalar& val, const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if(!internal::isApprox(self.coeff(i, j), val, prec))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** This is just an alias for isApproxToConstant().
|
||||
*
|
||||
* \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
|
||||
(const Scalar& val, const RealScalar& prec) const
|
||||
{
|
||||
return isApproxToConstant(val, prec);
|
||||
}
|
||||
|
||||
/** Alias for setConstant(): sets all coefficients in this expression to \a val.
|
||||
*
|
||||
* \sa setConstant(), Constant(), class CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
|
||||
{
|
||||
setConstant(val);
|
||||
}
|
||||
|
||||
/** Sets all coefficients in this expression to value \a val.
|
||||
*
|
||||
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
|
||||
{
|
||||
return derived() = Constant(rows(), cols(), val);
|
||||
}
|
||||
|
||||
/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include Matrix_setConstant_int.cpp
|
||||
* Output: \verbinclude Matrix_setConstant_int.out
|
||||
*
|
||||
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
|
||||
{
|
||||
resize(size);
|
||||
return setConstant(val);
|
||||
}
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val.
|
||||
*
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
* \param val the value to which all coefficients are set
|
||||
*
|
||||
* Example: \include Matrix_setConstant_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setConstant_int_int.out
|
||||
*
|
||||
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
|
||||
{
|
||||
resize(rows, cols);
|
||||
return setConstant(val);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly spaced vector.
|
||||
*
|
||||
* The function generates 'size' equally spaced values in the closed interval [low,high].
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include DenseBase_setLinSpaced.cpp
|
||||
* Output: \verbinclude DenseBase_setLinSpaced.out
|
||||
*
|
||||
* For integer scalar types, do not miss the explanations on the definition
|
||||
* of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
|
||||
*
|
||||
* \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets a linearly spaced vector.
|
||||
*
|
||||
* The function fills \c *this with equally spaced values in the closed interval [low,high].
|
||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* For integer scalar types, do not miss the explanations on the definition
|
||||
* of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
|
||||
*
|
||||
* \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return setLinSpaced(size(), low, high);
|
||||
}
|
||||
|
||||
// zero:
|
||||
|
||||
/** \returns an expression of a zero matrix.
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* Example: \include MatrixBase_zero_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_zero_int_int.out
|
||||
*
|
||||
* \sa Zero(), Zero(Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero(Index rows, Index cols)
|
||||
{
|
||||
return Constant(rows, cols, Scalar(0));
|
||||
}
|
||||
|
||||
/** \returns an expression of a zero vector.
|
||||
*
|
||||
* The parameter \a size is the size of the returned vector.
|
||||
* Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so Zero() should be used
|
||||
* instead.
|
||||
*
|
||||
* Example: \include MatrixBase_zero_int.cpp
|
||||
* Output: \verbinclude MatrixBase_zero_int.out
|
||||
*
|
||||
* \sa Zero(), Zero(Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero(Index size)
|
||||
{
|
||||
return Constant(size, Scalar(0));
|
||||
}
|
||||
|
||||
/** \returns an expression of a fixed-size zero matrix or vector.
|
||||
*
|
||||
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
* Example: \include MatrixBase_zero.cpp
|
||||
* Output: \verbinclude MatrixBase_zero.out
|
||||
*
|
||||
* \sa Zero(Index), Zero(Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Zero()
|
||||
{
|
||||
return Constant(Scalar(0));
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately equal to the zero matrix,
|
||||
* within the precision given by \a prec.
|
||||
*
|
||||
* Example: \include MatrixBase_isZero.cpp
|
||||
* Output: \verbinclude MatrixBase_isZero.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp, Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Sets all coefficients in this expression to zero.
|
||||
*
|
||||
* Example: \include MatrixBase_setZero.cpp
|
||||
* Output: \verbinclude MatrixBase_setZero.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp, Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
|
||||
{
|
||||
return setConstant(Scalar(0));
|
||||
}
|
||||
|
||||
/** Resizes to the given \a size, and sets all coefficients in this expression to zero.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include Matrix_setZero_int.cpp
|
||||
* Output: \verbinclude Matrix_setZero_int.out
|
||||
*
|
||||
* \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setZero(Index newSize)
|
||||
{
|
||||
resize(newSize);
|
||||
return setConstant(Scalar(0));
|
||||
}
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to zero.
|
||||
*
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setZero_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setZero_int_int.out
|
||||
*
|
||||
* \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setZero(Index rows, Index cols)
|
||||
{
|
||||
resize(rows, cols);
|
||||
return setConstant(Scalar(0));
|
||||
}
|
||||
|
||||
// ones:
|
||||
|
||||
/** \returns an expression of a matrix where all coefficients equal one.
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Ones() should be used
|
||||
* instead.
|
||||
*
|
||||
* Example: \include MatrixBase_ones_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_ones_int_int.out
|
||||
*
|
||||
* \sa Ones(), Ones(Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones(Index rows, Index cols)
|
||||
{
|
||||
return Constant(rows, cols, Scalar(1));
|
||||
}
|
||||
|
||||
/** \returns an expression of a vector where all coefficients equal one.
|
||||
*
|
||||
* The parameter \a newSize is the size of the returned vector.
|
||||
* Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so Ones() should be used
|
||||
* instead.
|
||||
*
|
||||
* Example: \include MatrixBase_ones_int.cpp
|
||||
* Output: \verbinclude MatrixBase_ones_int.out
|
||||
*
|
||||
* \sa Ones(), Ones(Index,Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones(Index newSize)
|
||||
{
|
||||
return Constant(newSize, Scalar(1));
|
||||
}
|
||||
|
||||
/** \returns an expression of a fixed-size matrix or vector where all coefficients equal one.
|
||||
*
|
||||
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
* Example: \include MatrixBase_ones.cpp
|
||||
* Output: \verbinclude MatrixBase_ones.out
|
||||
*
|
||||
* \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
|
||||
DenseBase<Derived>::Ones()
|
||||
{
|
||||
return Constant(Scalar(1));
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately equal to the matrix where all coefficients
|
||||
* are equal to 1, within the precision given by \a prec.
|
||||
*
|
||||
* Example: \include MatrixBase_isOnes.cpp
|
||||
* Output: \verbinclude MatrixBase_isOnes.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp, Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
|
||||
(const RealScalar& prec) const
|
||||
{
|
||||
return isApproxToConstant(Scalar(1), prec);
|
||||
}
|
||||
|
||||
/** Sets all coefficients in this expression to one.
|
||||
*
|
||||
* Example: \include MatrixBase_setOnes.cpp
|
||||
* Output: \verbinclude MatrixBase_setOnes.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp, Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
|
||||
{
|
||||
return setConstant(Scalar(1));
|
||||
}
|
||||
|
||||
/** Resizes to the given \a newSize, and sets all coefficients in this expression to one.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include Matrix_setOnes_int.cpp
|
||||
* Output: \verbinclude Matrix_setOnes_int.out
|
||||
*
|
||||
* \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setOnes(Index newSize)
|
||||
{
|
||||
resize(newSize);
|
||||
return setConstant(Scalar(1));
|
||||
}
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to one.
|
||||
*
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setOnes_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setOnes_int_int.out
|
||||
*
|
||||
* \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
|
||||
{
|
||||
resize(rows, cols);
|
||||
return setConstant(Scalar(1));
|
||||
}
|
||||
|
||||
// Identity:
|
||||
|
||||
/** \returns an expression of the identity matrix (not necessarily square).
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Identity() should be used
|
||||
* instead.
|
||||
*
|
||||
* Example: \include MatrixBase_identity_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_identity_int_int.out
|
||||
*
|
||||
* \sa Identity(), setIdentity(), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
MatrixBase<Derived>::Identity(Index rows, Index cols)
|
||||
{
|
||||
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
|
||||
}
|
||||
|
||||
/** \returns an expression of the identity matrix (not necessarily square).
|
||||
*
|
||||
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
|
||||
* need to use the variant taking size arguments.
|
||||
*
|
||||
* Example: \include MatrixBase_identity.cpp
|
||||
* Output: \verbinclude MatrixBase_identity.out
|
||||
*
|
||||
* \sa Identity(Index,Index), setIdentity(), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
|
||||
MatrixBase<Derived>::Identity()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately equal to the identity matrix
|
||||
* (not necessarily square),
|
||||
* within the precision given by \a prec.
|
||||
*
|
||||
* Example: \include MatrixBase_isIdentity.cpp
|
||||
* Output: \verbinclude MatrixBase_isIdentity.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), setIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isIdentity
|
||||
(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
for(Index i = 0; i < rows(); ++i)
|
||||
{
|
||||
if(i == j)
|
||||
{
|
||||
if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
|
||||
struct setIdentity_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
|
||||
{
|
||||
return m = Derived::Identity(m.rows(), m.cols());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct setIdentity_impl<Derived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Derived& run(Derived& m)
|
||||
{
|
||||
m.setZero();
|
||||
const Index size = numext::mini(m.rows(), m.cols());
|
||||
for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
|
||||
return m;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** Writes the identity expression (not necessarily square) into *this.
|
||||
*
|
||||
* Example: \include MatrixBase_setIdentity.cpp
|
||||
* Output: \verbinclude MatrixBase_setIdentity.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
|
||||
{
|
||||
return internal::setIdentity_impl<Derived>::run(derived());
|
||||
}
|
||||
|
||||
/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.
|
||||
*
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setIdentity_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setIdentity_int_int.out
|
||||
*
|
||||
* \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
|
||||
{
|
||||
derived().resize(rows, cols);
|
||||
return setIdentity();
|
||||
}
|
||||
|
||||
/** \returns an expression of the i-th unit (basis) vector.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
|
||||
}
|
||||
|
||||
/** \returns an expression of the i-th unit (basis) vector.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This variant is for fixed-size vector only.
|
||||
*
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
return BasisReturnType(SquareMatrixType::Identity(),i);
|
||||
}
|
||||
|
||||
/** \returns an expression of the X axis unit vector (1{,0}^*)
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
|
||||
{ return Derived::Unit(0); }
|
||||
|
||||
/** \returns an expression of the Y axis unit vector (0,1{,0}^*)
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
|
||||
{ return Derived::Unit(1); }
|
||||
|
||||
/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
|
||||
{ return Derived::Unit(2); }
|
||||
|
||||
/** \returns an expression of the W axis unit vector (0,0,0,1)
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
|
||||
{ return Derived::Unit(3); }
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CWISE_NULLARY_OP_H
|
||||
197
external/include/eigen3/Eigen/src/Core/CwiseTernaryOp.h
vendored
Normal file
197
external/include/eigen3/Eigen/src/Core/CwiseTernaryOp.h
vendored
Normal file
@@ -0,0 +1,197 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CWISE_TERNARY_OP_H
|
||||
#define EIGEN_CWISE_TERNARY_OP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
|
||||
struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
|
||||
// we must not inherit from traits<Arg1> since it has
|
||||
// the potential to cause problems with MSVC
|
||||
typedef typename remove_all<Arg1>::type Ancestor;
|
||||
typedef typename traits<Ancestor>::XprKind XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
|
||||
};
|
||||
|
||||
// even though we require Arg1, Arg2, and Arg3 to have the same scalar type
|
||||
// (see CwiseTernaryOp constructor),
|
||||
// we still want to handle the case when the result type is different.
|
||||
typedef typename result_of<TernaryOp(
|
||||
const typename Arg1::Scalar&, const typename Arg2::Scalar&,
|
||||
const typename Arg3::Scalar&)>::type Scalar;
|
||||
|
||||
typedef typename internal::traits<Arg1>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Arg1>::StorageIndex StorageIndex;
|
||||
|
||||
typedef typename Arg1::Nested Arg1Nested;
|
||||
typedef typename Arg2::Nested Arg2Nested;
|
||||
typedef typename Arg3::Nested Arg3Nested;
|
||||
typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
|
||||
typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
|
||||
typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
|
||||
enum { Flags = _Arg1Nested::Flags & RowMajorBit };
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
|
||||
typename StorageKind>
|
||||
class CwiseTernaryOpImpl;
|
||||
|
||||
/** \class CwiseTernaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise ternary operator is
|
||||
* applied to two expressions
|
||||
*
|
||||
* \tparam TernaryOp template functor implementing the operator
|
||||
* \tparam Arg1Type the type of the first argument
|
||||
* \tparam Arg2Type the type of the second argument
|
||||
* \tparam Arg3Type the type of the third argument
|
||||
*
|
||||
* This class represents an expression where a coefficient-wise ternary
|
||||
* operator is applied to three expressions.
|
||||
* It is the return type of ternary operators, by which we mean only those
|
||||
* ternary operators where
|
||||
* all three arguments are Eigen expressions.
|
||||
* For example, the return type of betainc(matrix1, matrix2, matrix3) is a
|
||||
* CwiseTernaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically
|
||||
* don't have to name
|
||||
* CwiseTernaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::ternaryExpr(const MatrixBase<Argument2> &, const
|
||||
* MatrixBase<Argument3> &, const CustomTernaryOp &) const, class CwiseBinaryOp,
|
||||
* class CwiseUnaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
template <typename TernaryOp, typename Arg1Type, typename Arg2Type,
|
||||
typename Arg3Type>
|
||||
class CwiseTernaryOp : public CwiseTernaryOpImpl<
|
||||
TernaryOp, Arg1Type, Arg2Type, Arg3Type,
|
||||
typename internal::traits<Arg1Type>::StorageKind>,
|
||||
internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
typedef typename internal::remove_all<Arg1Type>::type Arg1;
|
||||
typedef typename internal::remove_all<Arg2Type>::type Arg2;
|
||||
typedef typename internal::remove_all<Arg3Type>::type Arg3;
|
||||
|
||||
typedef typename CwiseTernaryOpImpl<
|
||||
TernaryOp, Arg1Type, Arg2Type, Arg3Type,
|
||||
typename internal::traits<Arg1Type>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp)
|
||||
|
||||
typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
|
||||
typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
|
||||
typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
|
||||
typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
|
||||
typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
|
||||
typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
|
||||
const Arg3& a3,
|
||||
const TernaryOp& func = TernaryOp())
|
||||
: m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {
|
||||
// require the sizes to match
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
|
||||
|
||||
// The index types should match
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<
|
||||
typename internal::traits<Arg1Type>::StorageKind,
|
||||
typename internal::traits<Arg2Type>::StorageKind>::value),
|
||||
STORAGE_KIND_MUST_MATCH)
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<
|
||||
typename internal::traits<Arg1Type>::StorageKind,
|
||||
typename internal::traits<Arg3Type>::StorageKind>::value),
|
||||
STORAGE_KIND_MUST_MATCH)
|
||||
|
||||
eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
|
||||
a1.rows() == a3.rows() && a1.cols() == a3.cols());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rows() const {
|
||||
// return the fixed size type if available to enable compile time
|
||||
// optimizations
|
||||
if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
RowsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg2Nested>::type>::
|
||||
RowsAtCompileTime == Dynamic)
|
||||
return m_arg3.rows();
|
||||
else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
RowsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg3Nested>::type>::
|
||||
RowsAtCompileTime == Dynamic)
|
||||
return m_arg2.rows();
|
||||
else
|
||||
return m_arg1.rows();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index cols() const {
|
||||
// return the fixed size type if available to enable compile time
|
||||
// optimizations
|
||||
if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
ColsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg2Nested>::type>::
|
||||
ColsAtCompileTime == Dynamic)
|
||||
return m_arg3.cols();
|
||||
else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
|
||||
ColsAtCompileTime == Dynamic &&
|
||||
internal::traits<typename internal::remove_all<Arg3Nested>::type>::
|
||||
ColsAtCompileTime == Dynamic)
|
||||
return m_arg2.cols();
|
||||
else
|
||||
return m_arg1.cols();
|
||||
}
|
||||
|
||||
/** \returns the first argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg1Nested& arg1() const { return m_arg1; }
|
||||
/** \returns the first argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg2Nested& arg2() const { return m_arg2; }
|
||||
/** \returns the third argument nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _Arg3Nested& arg3() const { return m_arg3; }
|
||||
/** \returns the functor representing the ternary operation */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const TernaryOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
Arg1Nested m_arg1;
|
||||
Arg2Nested m_arg2;
|
||||
Arg3Nested m_arg3;
|
||||
const TernaryOp m_functor;
|
||||
};
|
||||
|
||||
// Generic API dispatcher
|
||||
template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
|
||||
typename StorageKind>
|
||||
class CwiseTernaryOpImpl
|
||||
: public internal::generic_xpr_base<
|
||||
CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type {
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<
|
||||
CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type Base;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CWISE_TERNARY_OP_H
|
||||
103
external/include/eigen3/Eigen/src/Core/CwiseUnaryOp.h
vendored
Normal file
103
external/include/eigen3/Eigen/src/Core/CwiseUnaryOp.h
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CWISE_UNARY_OP_H
|
||||
#define EIGEN_CWISE_UNARY_OP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename UnaryOp, typename XprType>
|
||||
struct traits<CwiseUnaryOp<UnaryOp, XprType> >
|
||||
: traits<XprType>
|
||||
{
|
||||
typedef typename result_of<
|
||||
UnaryOp(const typename XprType::Scalar&)
|
||||
>::type Scalar;
|
||||
typedef typename XprType::Nested XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
enum {
|
||||
Flags = _XprTypeNested::Flags & RowMajorBit
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename UnaryOp, typename XprType, typename StorageKind>
|
||||
class CwiseUnaryOpImpl;
|
||||
|
||||
/** \class CwiseUnaryOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression where a coefficient-wise unary operator is applied to an expression
|
||||
*
|
||||
* \tparam UnaryOp template functor implementing the operator
|
||||
* \tparam XprType the type of the expression to which we are applying the unary operator
|
||||
*
|
||||
* This class represents an expression where a unary operator is applied to an expression.
|
||||
* It is the return type of all operations taking exactly 1 input expression, regardless of the
|
||||
* presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
|
||||
* is considered unary, because only the right-hand side is an expression, and its
|
||||
* return type is a specialization of CwiseUnaryOp.
|
||||
*
|
||||
* Most of the time, this is the only way that it is used, so you typically don't have to name
|
||||
* CwiseUnaryOp types explicitly.
|
||||
*
|
||||
* \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
|
||||
*/
|
||||
template<typename UnaryOp, typename XprType>
|
||||
class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||
: m_xpr(xpr), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Index cols() const { return m_xpr.cols(); }
|
||||
|
||||
/** \returns the functor representing the unary operation */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const UnaryOp& functor() const { return m_functor; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const typename internal::remove_all<XprTypeNested>::type&
|
||||
nestedExpression() const { return m_xpr; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
typename internal::remove_all<XprTypeNested>::type&
|
||||
nestedExpression() { return m_xpr; }
|
||||
|
||||
protected:
|
||||
XprTypeNested m_xpr;
|
||||
const UnaryOp m_functor;
|
||||
};
|
||||
|
||||
// Generic API dispatcher
|
||||
template<typename UnaryOp, typename XprType, typename StorageKind>
|
||||
class CwiseUnaryOpImpl
|
||||
: public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
|
||||
{
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CWISE_UNARY_OP_H
|
||||
128
external/include/eigen3/Eigen/src/Core/CwiseUnaryView.h
vendored
Normal file
128
external/include/eigen3/Eigen/src/Core/CwiseUnaryView.h
vendored
Normal file
@@ -0,0 +1,128 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CWISE_UNARY_VIEW_H
|
||||
#define EIGEN_CWISE_UNARY_VIEW_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
struct traits<CwiseUnaryView<ViewOp, MatrixType> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename result_of<
|
||||
ViewOp(const typename traits<MatrixType>::Scalar&)
|
||||
>::type Scalar;
|
||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
||||
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
enum {
|
||||
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
|
||||
Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
|
||||
MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret,
|
||||
// need to cast the sizeof's from size_t to int explicitly, otherwise:
|
||||
// "error: no integral type can represent all of the enumerator values
|
||||
InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
|
||||
? int(Dynamic)
|
||||
: int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
|
||||
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
|
||||
? int(Dynamic)
|
||||
: outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename ViewOp, typename MatrixType, typename StorageKind>
|
||||
class CwiseUnaryViewImpl;
|
||||
|
||||
/** \class CwiseUnaryView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
|
||||
*
|
||||
* \tparam ViewOp template functor implementing the view
|
||||
* \tparam MatrixType the type of the matrix we are applying the unary operator
|
||||
*
|
||||
* This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
|
||||
* It is the return type of real() and imag(), and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
|
||||
*/
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
|
||||
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
|
||||
explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
|
||||
: m_matrix(mat), m_functor(func) {}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
|
||||
|
||||
EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
/** \returns the functor representing unary operation */
|
||||
const ViewOp& functor() const { return m_functor; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
const typename internal::remove_all<MatrixTypeNested>::type&
|
||||
nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
typename internal::remove_reference<MatrixTypeNested>::type&
|
||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
||||
|
||||
protected:
|
||||
MatrixTypeNested m_matrix;
|
||||
ViewOp m_functor;
|
||||
};
|
||||
|
||||
// Generic API dispatcher
|
||||
template<typename ViewOp, typename XprType, typename StorageKind>
|
||||
class CwiseUnaryViewImpl
|
||||
: public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
|
||||
{
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
|
||||
};
|
||||
|
||||
template<typename ViewOp, typename MatrixType>
|
||||
class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
|
||||
: public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
|
||||
typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
|
||||
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const
|
||||
{
|
||||
return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const
|
||||
{
|
||||
return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CWISE_UNARY_VIEW_H
|
||||
611
external/include/eigen3/Eigen/src/Core/DenseBase.h
vendored
Normal file
611
external/include/eigen3/Eigen/src/Core/DenseBase.h
vendored
Normal file
@@ -0,0 +1,611 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_DENSEBASE_H
|
||||
#define EIGEN_DENSEBASE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.
|
||||
// This dummy function simply aims at checking that at compile time.
|
||||
static inline void check_DenseIndex_is_signed() {
|
||||
EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class DenseBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for all dense matrices, vectors, and arrays
|
||||
*
|
||||
* This class is the base that is inherited by all dense objects (matrix, vector, arrays,
|
||||
* and related expression types). The common Eigen API for dense objects is contained in this class.
|
||||
*
|
||||
* \tparam Derived is the derived type, e.g., a matrix type or an expression.
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
|
||||
*
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> class DenseBase
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
: public DenseCoeffsBase<Derived>
|
||||
#else
|
||||
: public DenseCoeffsBase<Derived,DirectWriteAccessors>
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
{
|
||||
public:
|
||||
|
||||
/** Inner iterator type to iterate over the coefficients of a row or column.
|
||||
* \sa class InnerIterator
|
||||
*/
|
||||
typedef Eigen::InnerIterator<Derived> InnerIterator;
|
||||
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
|
||||
/**
|
||||
* \brief The type used to store indices
|
||||
* \details This typedef is relevant for types that store multiple indices such as
|
||||
* PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index
|
||||
* \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
|
||||
*/
|
||||
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
|
||||
|
||||
/** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
|
||||
/** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
|
||||
*
|
||||
* It is an alias for the Scalar type */
|
||||
typedef Scalar value_type;
|
||||
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef DenseCoeffsBase<Derived> Base;
|
||||
|
||||
using Base::derived;
|
||||
using Base::const_cast_derived;
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::rowIndexByOuterInner;
|
||||
using Base::colIndexByOuterInner;
|
||||
using Base::coeff;
|
||||
using Base::coeffByOuterInner;
|
||||
using Base::operator();
|
||||
using Base::operator[];
|
||||
using Base::x;
|
||||
using Base::y;
|
||||
using Base::z;
|
||||
using Base::w;
|
||||
using Base::stride;
|
||||
using Base::innerStride;
|
||||
using Base::outerStride;
|
||||
using Base::rowStride;
|
||||
using Base::colStride;
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
enum {
|
||||
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
/**< The number of rows at compile-time. This is just a copy of the value provided
|
||||
* by the \a Derived type. If a value is not known at compile-time,
|
||||
* it is set to the \a Dynamic constant.
|
||||
* \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */
|
||||
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
/**< The number of columns at compile-time. This is just a copy of the value provided
|
||||
* by the \a Derived type. If a value is not known at compile-time,
|
||||
* it is set to the \a Dynamic constant.
|
||||
* \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
|
||||
|
||||
|
||||
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret),
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
* rows times the number of columns, or to \a Dynamic if this is not
|
||||
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
|
||||
|
||||
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
/**< This value is equal to the maximum possible number of rows that this expression
|
||||
* might have. If this expression might have an arbitrarily high number of rows,
|
||||
* this value is set to \a Dynamic.
|
||||
*
|
||||
* This value is useful to know when evaluating an expression, in order to determine
|
||||
* whether it is possible to avoid doing a dynamic memory allocation.
|
||||
*
|
||||
* \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime
|
||||
*/
|
||||
|
||||
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
|
||||
/**< This value is equal to the maximum possible number of columns that this expression
|
||||
* might have. If this expression might have an arbitrarily high number of columns,
|
||||
* this value is set to \a Dynamic.
|
||||
*
|
||||
* This value is useful to know when evaluating an expression, in order to determine
|
||||
* whether it is possible to avoid doing a dynamic memory allocation.
|
||||
*
|
||||
* \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
|
||||
*/
|
||||
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret),
|
||||
/**< This value is equal to the maximum possible number of coefficients that this expression
|
||||
* might have. If this expression might have an arbitrarily high number of coefficients,
|
||||
* this value is set to \a Dynamic.
|
||||
*
|
||||
* This value is useful to know when evaluating an expression, in order to determine
|
||||
* whether it is possible to avoid doing a dynamic memory allocation.
|
||||
*
|
||||
* \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime
|
||||
*/
|
||||
|
||||
IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
|
||||
|| internal::traits<Derived>::MaxColsAtCompileTime == 1,
|
||||
/**< This is set to true if either the number of rows or the number of
|
||||
* columns is known at compile-time to be equal to 1. Indeed, in that case,
|
||||
* we are dealing with a column-vector (if there is only one column) or with
|
||||
* a row-vector (if there is only one row). */
|
||||
|
||||
Flags = internal::traits<Derived>::Flags,
|
||||
/**< This stores expression \ref flags flags which may or may not be inherited by new expressions
|
||||
* constructed from this one. See the \ref flags "list of flags".
|
||||
*/
|
||||
|
||||
IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */
|
||||
|
||||
InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
|
||||
: int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
|
||||
|
||||
InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
|
||||
OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
|
||||
};
|
||||
|
||||
typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;
|
||||
|
||||
enum { IsPlainObjectBase = 0 };
|
||||
|
||||
/** The plain matrix type corresponding to this expression.
|
||||
* \sa PlainObject */
|
||||
typedef Matrix<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainMatrix;
|
||||
|
||||
/** The plain array type corresponding to this expression.
|
||||
* \sa PlainObject */
|
||||
typedef Array<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainArray;
|
||||
|
||||
/** \brief The plain matrix or array type corresponding to this expression.
|
||||
*
|
||||
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
|
||||
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
|
||||
* that the return type of eval() is either PlainObject or const PlainObject&.
|
||||
*/
|
||||
typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
|
||||
PlainMatrix, PlainArray>::type PlainObject;
|
||||
|
||||
/** \returns the number of nonzero coefficients which is in practice the number
|
||||
* of stored coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index nonZeros() const { return size(); }
|
||||
|
||||
/** \returns the outer size.
|
||||
*
|
||||
* \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
|
||||
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
|
||||
* column-major matrix, and the number of rows for a row-major matrix. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index outerSize() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? 1
|
||||
: int(IsRowMajor) ? this->rows() : this->cols();
|
||||
}
|
||||
|
||||
/** \returns the inner size.
|
||||
*
|
||||
* \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
|
||||
* with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a
|
||||
* column-major matrix, and the number of columns for a row-major matrix. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index innerSize() const
|
||||
{
|
||||
return IsVectorAtCompileTime ? this->size()
|
||||
: int(IsRowMajor) ? this->cols() : this->rows();
|
||||
}
|
||||
|
||||
/** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
|
||||
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
* nothing else.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index newSize)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(newSize);
|
||||
eigen_assert(newSize == this->size()
|
||||
&& "DenseBase::resize() does not actually allow to resize.");
|
||||
}
|
||||
/** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
|
||||
* Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
|
||||
* nothing else.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(Index rows, Index cols)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(rows);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(cols);
|
||||
eigen_assert(rows == this->rows() && cols == this->cols()
|
||||
&& "DenseBase::resize() does not actually allow to resize.");
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||
/** \internal the return type of MatrixBase::eigenvalues() */
|
||||
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
/** Copies \a other into *this. \returns a reference to *this. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const DenseBase& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator+=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator-=(const EigenBase<OtherDerived> &other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& func);
|
||||
|
||||
/** \internal
|
||||
* Copies \a other into *this without evaluating other. \returns a reference to *this.
|
||||
* \deprecated */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& lazyAssign(const DenseBase<OtherDerived>& other);
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer<Derived> operator<< (const Scalar& s);
|
||||
|
||||
/** \deprecated it now returns \c *this */
|
||||
template<unsigned int Added,unsigned int Removed>
|
||||
EIGEN_DEPRECATED
|
||||
const Derived& flagged() const
|
||||
{ return derived(); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
|
||||
|
||||
typedef Transpose<Derived> TransposeReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
TransposeReturnType transpose();
|
||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstTransposeReturnType transpose() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void transposeInPlace();
|
||||
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
Constant(Index rows, Index cols, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
Constant(Index size, const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType
|
||||
Constant(const Scalar& value);
|
||||
|
||||
EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
|
||||
LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
|
||||
LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
|
||||
LinSpaced(const Scalar& low, const Scalar& high);
|
||||
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(Index size, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(const CustomNullaryOp& func);
|
||||
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
|
||||
|
||||
EIGEN_DEVICE_FUNC void fill(const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
|
||||
EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
|
||||
EIGEN_DEVICE_FUNC Derived& setZero();
|
||||
EIGEN_DEVICE_FUNC Derived& setOnes();
|
||||
EIGEN_DEVICE_FUNC Derived& setRandom();
|
||||
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC
|
||||
bool isApprox(const DenseBase<OtherDerived>& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool isMuchSmallerThan(const RealScalar& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC
|
||||
bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
inline bool hasNaN() const;
|
||||
inline bool allFinite() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator*=(const Scalar& other);
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator/=(const Scalar& other);
|
||||
|
||||
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
|
||||
/** \returns the matrix or vector obtained by evaluating this expression.
|
||||
*
|
||||
* Notice that in the case of a plain matrix or vector (not an expression) this function just returns
|
||||
* a const reference, in order to avoid a useless copy.
|
||||
*
|
||||
* \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE EvalReturnType eval() const
|
||||
{
|
||||
// Even though MSVC does not honor strong inlining when the return type
|
||||
// is a dynamic matrix, we desperately need strong inlining for fixed
|
||||
// size types on MSVC.
|
||||
return typename internal::eval<Derived>::type(derived());
|
||||
}
|
||||
|
||||
/** swaps *this with the expression \a other.
|
||||
*
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
|
||||
eigen_assert(rows()==other.rows() && cols()==other.cols());
|
||||
call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
/** swaps *this with the matrix or array \a other.
|
||||
*
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(PlainObjectBase<OtherDerived>& other)
|
||||
{
|
||||
eigen_assert(rows()==other.rows() && cols()==other.cols());
|
||||
call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
|
||||
EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
|
||||
EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
|
||||
template<bool Enable> EIGEN_DEVICE_FUNC
|
||||
inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
|
||||
template<bool Enable> EIGEN_DEVICE_FUNC
|
||||
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar sum() const;
|
||||
EIGEN_DEVICE_FUNC Scalar mean() const;
|
||||
EIGEN_DEVICE_FUNC Scalar trace() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar prod() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
|
||||
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
|
||||
template<typename IndexType> EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
|
||||
|
||||
template<typename BinaryOp>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Scalar redux(const BinaryOp& func) const;
|
||||
|
||||
template<typename Visitor>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void visit(Visitor& func) const;
|
||||
|
||||
/** \returns a WithFormat proxy object allowing to print a matrix the with given
|
||||
* format \a fmt.
|
||||
*
|
||||
* See class IOFormat for some examples.
|
||||
*
|
||||
* \sa class IOFormat, class WithFormat
|
||||
*/
|
||||
inline const WithFormat<Derived> format(const IOFormat& fmt) const
|
||||
{
|
||||
return WithFormat<Derived>(derived(), fmt);
|
||||
}
|
||||
|
||||
/** \returns the unique coefficient of a 1x1 expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
CoeffReturnType value() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
|
||||
eigen_assert(this->rows() == 1 && this->cols() == 1);
|
||||
return derived().coeff(0,0);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC bool all() const;
|
||||
EIGEN_DEVICE_FUNC bool any() const;
|
||||
EIGEN_DEVICE_FUNC Index count() const;
|
||||
|
||||
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
|
||||
typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
|
||||
typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
|
||||
typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
|
||||
|
||||
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* Example: \include MatrixBase_rowwise.cpp
|
||||
* Output: \verbinclude MatrixBase_rowwise.out
|
||||
*
|
||||
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
//Code moved here due to a CUDA compiler bug
|
||||
EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
|
||||
return ConstRowwiseReturnType(derived());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
|
||||
|
||||
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* Example: \include MatrixBase_colwise.cpp
|
||||
* Output: \verbinclude MatrixBase_colwise.out
|
||||
*
|
||||
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
|
||||
return ConstColwiseReturnType(derived());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
|
||||
|
||||
typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
|
||||
static const RandomReturnType Random(Index rows, Index cols);
|
||||
static const RandomReturnType Random(Index size);
|
||||
static const RandomReturnType Random();
|
||||
|
||||
template<typename ThenDerived,typename ElseDerived>
|
||||
const Select<Derived,ThenDerived,ElseDerived>
|
||||
select(const DenseBase<ThenDerived>& thenMatrix,
|
||||
const DenseBase<ElseDerived>& elseMatrix) const;
|
||||
|
||||
template<typename ThenDerived>
|
||||
inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
|
||||
select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
|
||||
|
||||
template<typename ElseDerived>
|
||||
inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
|
||||
select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
|
||||
|
||||
template<int p> RealScalar lpNorm() const;
|
||||
|
||||
template<int RowFactor, int ColFactor>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Replicate<Derived,RowFactor,ColFactor> replicate() const;
|
||||
/**
|
||||
* \return an expression of the replication of \c *this
|
||||
*
|
||||
* Example: \include MatrixBase_replicate_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_replicate_int_int.out
|
||||
*
|
||||
* \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
|
||||
*/
|
||||
//Code moved here due to a CUDA compiler bug
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
|
||||
{
|
||||
return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
|
||||
}
|
||||
|
||||
typedef Reverse<Derived, BothDirections> ReverseReturnType;
|
||||
typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
|
||||
EIGEN_DEVICE_FUNC ReverseReturnType reverse();
|
||||
/** This is the const version of reverse(). */
|
||||
//Code moved here due to a CUDA compiler bug
|
||||
EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
|
||||
{
|
||||
return ConstReverseReturnType(derived());
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void reverseInPlace();
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
|
||||
#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
|
||||
#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
|
||||
# include "../plugins/BlockMethods.h"
|
||||
# ifdef EIGEN_DENSEBASE_PLUGIN
|
||||
# include EIGEN_DENSEBASE_PLUGIN
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
|
||||
#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
|
||||
|
||||
// disable the use of evalTo for dense objects with a nice compilation error
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& ) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
|
||||
}
|
||||
|
||||
protected:
|
||||
/** Default constructor. Do nothing. */
|
||||
EIGEN_DEVICE_FUNC DenseBase()
|
||||
{
|
||||
/* Just checks for self-consistency of the flags.
|
||||
* Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
|
||||
*/
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
|
||||
&& EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
|
||||
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
EIGEN_DEVICE_FUNC explicit DenseBase(int);
|
||||
EIGEN_DEVICE_FUNC DenseBase(int,int);
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_DENSEBASE_H
|
||||
681
external/include/eigen3/Eigen/src/Core/DenseCoeffsBase.h
vendored
Normal file
681
external/include/eigen3/Eigen/src/Core/DenseCoeffsBase.h
vendored
Normal file
@@ -0,0 +1,681 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_DENSECOEFFSBASE_H
|
||||
#define EIGEN_DENSECOEFFSBASE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename T> struct add_const_on_value_type_if_arithmetic
|
||||
{
|
||||
typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
|
||||
};
|
||||
}
|
||||
|
||||
/** \brief Base class providing read-only coefficient access to matrices and arrays.
|
||||
* \ingroup Core_Module
|
||||
* \tparam Derived Type of the derived class
|
||||
* \tparam #ReadOnlyAccessors Constant indicating read-only access
|
||||
*
|
||||
* This class defines the \c operator() \c const function and friends, which can be used to read specific
|
||||
* entries of a matrix or array.
|
||||
*
|
||||
* \sa DenseCoeffsBase<Derived, WriteAccessors>, DenseCoeffsBase<Derived, DirectAccessors>,
|
||||
* \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
|
||||
{
|
||||
public:
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
|
||||
// Explanation for this CoeffReturnType typedef.
|
||||
// - This is the return type of the coeff() method.
|
||||
// - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references
|
||||
// to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).
|
||||
// - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems
|
||||
// while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is
|
||||
// not possible, since the underlying expressions might not offer a valid address the reference could be referring to.
|
||||
typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
|
||||
const Scalar&,
|
||||
typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
|
||||
>::type CoeffReturnType;
|
||||
|
||||
typedef typename internal::add_const_on_value_type_if_arithmetic<
|
||||
typename internal::packet_traits<Scalar>::type
|
||||
>::type PacketReturnType;
|
||||
|
||||
typedef EigenBase<Derived> Base;
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::derived;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return int(Derived::RowsAtCompileTime) == 1 ? 0
|
||||
: int(Derived::ColsAtCompileTime) == 1 ? inner
|
||||
: int(Derived::Flags)&RowMajorBit ? outer
|
||||
: inner;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return int(Derived::ColsAtCompileTime) == 1 ? 0
|
||||
: int(Derived::RowsAtCompileTime) == 1 ? inner
|
||||
: int(Derived::Flags)&RowMajorBit ? inner
|
||||
: outer;
|
||||
}
|
||||
|
||||
/** Short version: don't use this function, use
|
||||
* \link operator()(Index,Index) const \endlink instead.
|
||||
*
|
||||
* Long version: this function is similar to
|
||||
* \link operator()(Index,Index) const \endlink, but without the assertion.
|
||||
* Use this for limiting the performance cost of debugging code when doing
|
||||
* repeated coefficient access. Only use this when it is guaranteed that the
|
||||
* parameters \a row and \a col are in range.
|
||||
*
|
||||
* If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
|
||||
* function equivalent to \link operator()(Index,Index) const \endlink.
|
||||
*
|
||||
* \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
&& col >= 0 && col < cols());
|
||||
return internal::evaluator<Derived>(derived()).coeff(row,col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return coeff(rowIndexByOuterInner(outer, inner),
|
||||
colIndexByOuterInner(outer, inner));
|
||||
}
|
||||
|
||||
/** \returns the coefficient at given the given row and column.
|
||||
*
|
||||
* \sa operator()(Index,Index), operator[](Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
|
||||
{
|
||||
eigen_assert(row >= 0 && row < rows()
|
||||
&& col >= 0 && col < cols());
|
||||
return coeff(row, col);
|
||||
}
|
||||
|
||||
/** Short version: don't use this function, use
|
||||
* \link operator[](Index) const \endlink instead.
|
||||
*
|
||||
* Long version: this function is similar to
|
||||
* \link operator[](Index) const \endlink, but without the assertion.
|
||||
* Use this for limiting the performance cost of debugging code when doing
|
||||
* repeated coefficient access. Only use this when it is guaranteed that the
|
||||
* parameter \a index is in range.
|
||||
*
|
||||
* If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
|
||||
* function equivalent to \link operator[](Index) const \endlink.
|
||||
*
|
||||
* \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
coeff(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
|
||||
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return internal::evaluator<Derived>(derived()).coeff(index);
|
||||
}
|
||||
|
||||
|
||||
/** \returns the coefficient at given index.
|
||||
*
|
||||
* This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
|
||||
*
|
||||
* \sa operator[](Index), operator()(Index,Index) const, x() const, y() const,
|
||||
* z() const, w() const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
operator[](Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
return coeff(index);
|
||||
}
|
||||
|
||||
/** \returns the coefficient at given index.
|
||||
*
|
||||
* This is synonymous to operator[](Index) const.
|
||||
*
|
||||
* This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
|
||||
*
|
||||
* \sa operator[](Index), operator()(Index,Index) const, x() const, y() const,
|
||||
* z() const, w() const
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
operator()(Index index) const
|
||||
{
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
return coeff(index);
|
||||
}
|
||||
|
||||
/** equivalent to operator[](0). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
x() const { return (*this)[0]; }
|
||||
|
||||
/** equivalent to operator[](1). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
y() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[1];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
z() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[2];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE CoeffReturnType
|
||||
w() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[3];
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* \returns the packet of coefficients starting at the given row and column. It is your responsibility
|
||||
* to ensure that a packet really starts there. This method is only available on expressions having the
|
||||
* PacketAccessBit.
|
||||
*
|
||||
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
|
||||
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
|
||||
* starting at an address which is a multiple of the packet size.
|
||||
*/
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
|
||||
{
|
||||
typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
|
||||
eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
||||
return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
|
||||
}
|
||||
|
||||
|
||||
/** \internal */
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const
|
||||
{
|
||||
return packet<LoadMode>(rowIndexByOuterInner(outer, inner),
|
||||
colIndexByOuterInner(outer, inner));
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* \returns the packet of coefficients starting at the given index. It is your responsibility
|
||||
* to ensure that a packet really starts there. This method is only available on expressions having the
|
||||
* PacketAccessBit and the LinearAccessBit.
|
||||
*
|
||||
* The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
|
||||
* the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
|
||||
* starting at an address which is a multiple of the packet size.
|
||||
*/
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
|
||||
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
|
||||
typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
|
||||
}
|
||||
|
||||
protected:
|
||||
// explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase.
|
||||
// But some methods are only available in the DirectAccess case.
|
||||
// So we add dummy methods here with these names, so that "using... " doesn't fail.
|
||||
// It's not private so that the child class DenseBase can access them, and it's not public
|
||||
// either since it's an implementation detail, so has to be protected.
|
||||
void coeffRef();
|
||||
void coeffRefByOuterInner();
|
||||
void writePacket();
|
||||
void writePacketByOuterInner();
|
||||
void copyCoeff();
|
||||
void copyCoeffByOuterInner();
|
||||
void copyPacket();
|
||||
void copyPacketByOuterInner();
|
||||
void stride();
|
||||
void innerStride();
|
||||
void outerStride();
|
||||
void rowStride();
|
||||
void colStride();
|
||||
};
|
||||
|
||||
/** \brief Base class providing read/write coefficient access to matrices and arrays.
|
||||
* \ingroup Core_Module
|
||||
* \tparam Derived Type of the derived class
|
||||
* \tparam #WriteAccessors Constant indicating read/write access
|
||||
*
|
||||
* This class defines the non-const \c operator() function and friends, which can be used to write specific
|
||||
* entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
|
||||
* defines the const variant for reading specific entries.
|
||||
*
|
||||
* \sa DenseCoeffsBase<Derived, DirectAccessors>, \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
|
||||
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
using Base::coeff;
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::derived;
|
||||
using Base::rowIndexByOuterInner;
|
||||
using Base::colIndexByOuterInner;
|
||||
using Base::operator[];
|
||||
using Base::operator();
|
||||
using Base::x;
|
||||
using Base::y;
|
||||
using Base::z;
|
||||
using Base::w;
|
||||
|
||||
/** Short version: don't use this function, use
|
||||
* \link operator()(Index,Index) \endlink instead.
|
||||
*
|
||||
* Long version: this function is similar to
|
||||
* \link operator()(Index,Index) \endlink, but without the assertion.
|
||||
* Use this for limiting the performance cost of debugging code when doing
|
||||
* repeated coefficient access. Only use this when it is guaranteed that the
|
||||
* parameters \a row and \a col are in range.
|
||||
*
|
||||
* If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
|
||||
* function equivalent to \link operator()(Index,Index) \endlink.
|
||||
*
|
||||
* \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
eigen_internal_assert(row >= 0 && row < rows()
|
||||
&& col >= 0 && col < cols());
|
||||
return internal::evaluator<Derived>(derived()).coeffRef(row,col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
coeffRefByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
return coeffRef(rowIndexByOuterInner(outer, inner),
|
||||
colIndexByOuterInner(outer, inner));
|
||||
}
|
||||
|
||||
/** \returns a reference to the coefficient at given the given row and column.
|
||||
*
|
||||
* \sa operator[](Index)
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator()(Index row, Index col)
|
||||
{
|
||||
eigen_assert(row >= 0 && row < rows()
|
||||
&& col >= 0 && col < cols());
|
||||
return coeffRef(row, col);
|
||||
}
|
||||
|
||||
|
||||
/** Short version: don't use this function, use
|
||||
* \link operator[](Index) \endlink instead.
|
||||
*
|
||||
* Long version: this function is similar to
|
||||
* \link operator[](Index) \endlink, but without the assertion.
|
||||
* Use this for limiting the performance cost of debugging code when doing
|
||||
* repeated coefficient access. Only use this when it is guaranteed that the
|
||||
* parameters \a row and \a col are in range.
|
||||
*
|
||||
* If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
|
||||
* function equivalent to \link operator[](Index) \endlink.
|
||||
*
|
||||
* \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
|
||||
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return internal::evaluator<Derived>(derived()).coeffRef(index);
|
||||
}
|
||||
|
||||
/** \returns a reference to the coefficient at given index.
|
||||
*
|
||||
* This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
|
||||
*
|
||||
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator[](Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
|
||||
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
return coeffRef(index);
|
||||
}
|
||||
|
||||
/** \returns a reference to the coefficient at given index.
|
||||
*
|
||||
* This is synonymous to operator[](Index).
|
||||
*
|
||||
* This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
|
||||
*
|
||||
* \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
|
||||
*/
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
operator()(Index index)
|
||||
{
|
||||
eigen_assert(index >= 0 && index < size());
|
||||
return coeffRef(index);
|
||||
}
|
||||
|
||||
/** equivalent to operator[](0). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
x() { return (*this)[0]; }
|
||||
|
||||
/** equivalent to operator[](1). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
y()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[1];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](2). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
z()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[2];
|
||||
}
|
||||
|
||||
/** equivalent to operator[](3). */
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar&
|
||||
w()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
|
||||
return (*this)[3];
|
||||
}
|
||||
};
|
||||
|
||||
/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
|
||||
* \ingroup Core_Module
|
||||
* \tparam Derived Type of the derived class
|
||||
* \tparam #DirectAccessors Constant indicating direct access
|
||||
*
|
||||
* This class defines functions to work with strides which can be used to access entries directly. This class
|
||||
* inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
|
||||
* \c operator() .
|
||||
*
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::derived;
|
||||
|
||||
/** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
|
||||
*
|
||||
* \sa outerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return derived().innerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
|
||||
* in a column-major matrix).
|
||||
*
|
||||
* \sa innerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return derived().outerStride();
|
||||
}
|
||||
|
||||
// FIXME shall we remove it ?
|
||||
inline Index stride() const
|
||||
{
|
||||
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive rows.
|
||||
*
|
||||
* \sa innerStride(), outerStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rowStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? outerStride() : innerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive columns.
|
||||
*
|
||||
* \sa innerStride(), outerStride(), rowStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index colStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? innerStride() : outerStride();
|
||||
}
|
||||
};
|
||||
|
||||
/** \brief Base class providing direct read/write coefficient access to matrices and arrays.
|
||||
* \ingroup Core_Module
|
||||
* \tparam Derived Type of the derived class
|
||||
* \tparam #DirectWriteAccessors Constant indicating direct access
|
||||
*
|
||||
* This class defines functions to work with strides which can be used to access entries directly. This class
|
||||
* inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
|
||||
* \c operator().
|
||||
*
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived>
|
||||
class DenseCoeffsBase<Derived, DirectWriteAccessors>
|
||||
: public DenseCoeffsBase<Derived, WriteAccessors>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::derived;
|
||||
|
||||
/** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
|
||||
*
|
||||
* \sa outerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return derived().innerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
|
||||
* in a column-major matrix).
|
||||
*
|
||||
* \sa innerStride(), rowStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return derived().outerStride();
|
||||
}
|
||||
|
||||
// FIXME shall we remove it ?
|
||||
inline Index stride() const
|
||||
{
|
||||
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive rows.
|
||||
*
|
||||
* \sa innerStride(), outerStride(), colStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rowStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? outerStride() : innerStride();
|
||||
}
|
||||
|
||||
/** \returns the pointer increment between two consecutive columns.
|
||||
*
|
||||
* \sa innerStride(), outerStride(), rowStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index colStride() const
|
||||
{
|
||||
return Derived::IsRowMajor ? innerStride() : outerStride();
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int Alignment, typename Derived, bool JustReturnZero>
|
||||
struct first_aligned_impl
|
||||
{
|
||||
static inline Index run(const Derived&)
|
||||
{ return 0; }
|
||||
};
|
||||
|
||||
template<int Alignment, typename Derived>
|
||||
struct first_aligned_impl<Alignment, Derived, false>
|
||||
{
|
||||
static inline Index run(const Derived& m)
|
||||
{
|
||||
return internal::first_aligned<Alignment>(m.data(), m.size());
|
||||
}
|
||||
};
|
||||
|
||||
/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization.
|
||||
*
|
||||
* \tparam Alignment requested alignment in Bytes.
|
||||
*
|
||||
* There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
|
||||
* documentation.
|
||||
*/
|
||||
template<int Alignment, typename Derived>
|
||||
static inline Index first_aligned(const DenseBase<Derived>& m)
|
||||
{
|
||||
enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
|
||||
return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
static inline Index first_default_aligned(const DenseBase<Derived>& m)
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type DefaultPacketType;
|
||||
return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);
|
||||
}
|
||||
|
||||
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
|
||||
struct inner_stride_at_compile_time
|
||||
{
|
||||
enum { ret = traits<Derived>::InnerStrideAtCompileTime };
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct inner_stride_at_compile_time<Derived, false>
|
||||
{
|
||||
enum { ret = 0 };
|
||||
};
|
||||
|
||||
template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
|
||||
struct outer_stride_at_compile_time
|
||||
{
|
||||
enum { ret = traits<Derived>::OuterStrideAtCompileTime };
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct outer_stride_at_compile_time<Derived, false>
|
||||
{
|
||||
enum { ret = 0 };
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_DENSECOEFFSBASE_H
|
||||
570
external/include/eigen3/Eigen/src/Core/DenseStorage.h
vendored
Normal file
570
external/include/eigen3/Eigen/src/Core/DenseStorage.h
vendored
Normal file
@@ -0,0 +1,570 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATRIXSTORAGE_H
|
||||
#define EIGEN_MATRIXSTORAGE_H
|
||||
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
|
||||
#else
|
||||
#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
struct constructor_without_unaligned_array_assert {};
|
||||
|
||||
template<typename T, int Size>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void check_static_allocation_size()
|
||||
{
|
||||
// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
|
||||
#if EIGEN_STACK_ALLOCATION_LIMIT
|
||||
EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
|
||||
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
|
||||
*/
|
||||
template <typename T, int Size, int MatrixOrArrayOptions,
|
||||
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
|
||||
: compute_default_alignment<T,Size>::value >
|
||||
struct plain_array
|
||||
{
|
||||
T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
|
||||
#elif EIGEN_GNUC_AT_LEAST(4,7)
|
||||
// GCC 4.7 is too aggressive in its optimizations and remove the alignement test based on the fact the array is declared to be aligned.
|
||||
// See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900
|
||||
// Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined:
|
||||
template<typename PtrType>
|
||||
EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
#else
|
||||
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||
eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
|
||||
&& "this assertion is explained here: " \
|
||||
"http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
|
||||
" **** READ THIS WEB PAGE !!! ****");
|
||||
#endif
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 8>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 32>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||
struct plain_array<T, Size, MatrixOrArrayOptions, 64>
|
||||
{
|
||||
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array()
|
||||
{
|
||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
plain_array(constructor_without_unaligned_array_assert)
|
||||
{
|
||||
check_static_allocation_size<T,Size>();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int MatrixOrArrayOptions, int Alignment>
|
||||
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
|
||||
{
|
||||
T array[1];
|
||||
EIGEN_DEVICE_FUNC plain_array() {}
|
||||
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \class DenseStorage
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Stores the data of a matrix
|
||||
*
|
||||
* This class stores the data of fixed-size, dynamic-size or mixed matrices
|
||||
* in a way as compact as possible.
|
||||
*
|
||||
* \sa Matrix
|
||||
*/
|
||||
template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
|
||||
|
||||
// purely fixed-size matrix
|
||||
template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()) {}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other) m_data = other.m_data;
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
|
||||
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// null matrix
|
||||
template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
|
||||
{
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
|
||||
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return 0; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return 0; }
|
||||
};
|
||||
|
||||
// more specializations for null matrices; these are necessary to resolve ambiguities
|
||||
template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
|
||||
: public DenseStorage<T, 0, 0, 0, _Options> { };
|
||||
|
||||
// dynamic-size matrix with fixed-size storage
|
||||
template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
Index m_rows;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
m_rows = other.m_rows;
|
||||
m_cols = other.m_cols;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage and fixed width
|
||||
template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
Index m_rows;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
m_rows = other.m_rows;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
|
||||
EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// dynamic-size matrix with fixed-size storage and fixed height
|
||||
template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
|
||||
{
|
||||
internal::plain_array<T,Size,_Options> m_data;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
m_cols = other.m_cols;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
||||
void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
|
||||
void resize(Index, Index, Index cols) { m_cols = cols; }
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
|
||||
};
|
||||
|
||||
// purely dynamic matrix.
|
||||
template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
|
||||
{
|
||||
T *m_data;
|
||||
Index m_rows;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
|
||||
: m_data(0), m_rows(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
|
||||
, m_rows(other.m_rows)
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_rows(std::move(other.m_rows))
|
||||
, m_cols(std::move(other.m_cols))
|
||||
{
|
||||
other.m_data = nullptr;
|
||||
other.m_rows = 0;
|
||||
other.m_cols = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
swap(m_rows, other.m_rows);
|
||||
swap(m_cols, other.m_cols);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
||||
void conservativeResize(Index size, Index rows, Index cols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
|
||||
{
|
||||
if(size != m_rows*m_cols)
|
||||
{
|
||||
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
|
||||
if (size)
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
};
|
||||
|
||||
// matrix with dynamic width and fixed height (so that matrix has dynamic size).
|
||||
template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
|
||||
{
|
||||
T *m_data;
|
||||
Index m_cols;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
|
||||
, m_cols(other.m_cols)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
|
||||
internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_cols(std::move(other.m_cols))
|
||||
{
|
||||
other.m_data = nullptr;
|
||||
other.m_cols = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
swap(m_cols, other.m_cols);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
|
||||
EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
|
||||
EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
|
||||
EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
|
||||
{
|
||||
if(size != _Rows*m_cols)
|
||||
{
|
||||
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
|
||||
if (size)
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_cols = cols;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
};
|
||||
|
||||
// matrix with dynamic height and fixed width (so that matrix has dynamic size).
|
||||
template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
|
||||
{
|
||||
T *m_data;
|
||||
Index m_rows;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
|
||||
explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
|
||||
: m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
|
||||
, m_rows(other.m_rows)
|
||||
{
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
|
||||
internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
|
||||
{
|
||||
if (this != &other)
|
||||
{
|
||||
DenseStorage tmp(other);
|
||||
this->swap(tmp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
: m_data(std::move(other.m_data))
|
||||
, m_rows(std::move(other.m_rows))
|
||||
{
|
||||
other.m_data = nullptr;
|
||||
other.m_rows = 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
swap(m_rows, other.m_rows);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
|
||||
EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
|
||||
EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
|
||||
EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
|
||||
void conservativeResize(Index size, Index rows, Index)
|
||||
{
|
||||
m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
|
||||
m_rows = rows;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
|
||||
{
|
||||
if(size != m_rows*_Cols)
|
||||
{
|
||||
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
|
||||
if (size)
|
||||
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
|
||||
else
|
||||
m_data = 0;
|
||||
EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
|
||||
}
|
||||
m_rows = rows;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
|
||||
EIGEN_DEVICE_FUNC T *data() { return m_data; }
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATRIX_H
|
||||
260
external/include/eigen3/Eigen/src/Core/Diagonal.h
vendored
Normal file
260
external/include/eigen3/Eigen/src/Core/Diagonal.h
vendored
Normal file
@@ -0,0 +1,260 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_DIAGONAL_H
|
||||
#define EIGEN_DIAGONAL_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Diagonal
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
|
||||
*
|
||||
* \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
|
||||
* \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
|
||||
* A positive value means a superdiagonal, a negative value means a subdiagonal.
|
||||
* You can also use DynamicIndex so the index can be set at runtime.
|
||||
*
|
||||
* The matrix is not required to be square.
|
||||
*
|
||||
* This class represents an expression of the main diagonal, or any sub/super diagonal
|
||||
* of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(Index) and most of the
|
||||
* time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index)
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType, int DiagIndex>
|
||||
struct traits<Diagonal<MatrixType,DiagIndex> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
typedef typename MatrixType::StorageKind StorageKind;
|
||||
enum {
|
||||
RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
|
||||
: (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
|
||||
MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
|
||||
ColsAtCompileTime = 1,
|
||||
MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
|
||||
: DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime,
|
||||
MatrixType::MaxColsAtCompileTime)
|
||||
: (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
|
||||
MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
|
||||
MaxColsAtCompileTime = 1,
|
||||
MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
|
||||
Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
|
||||
MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
|
||||
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
|
||||
OuterStrideAtCompileTime = 0
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename MatrixType, int _DiagIndex> class Diagonal
|
||||
: public internal::dense_xpr_base< Diagonal<MatrixType,_DiagIndex> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
enum { DiagIndex = _DiagIndex };
|
||||
typedef typename internal::dense_xpr_base<Diagonal>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
|
||||
{
|
||||
eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
|
||||
}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const
|
||||
{
|
||||
return m_index.value()<0 ? numext::mini<Index>(m_matrix.cols(),m_matrix.rows()+m_index.value())
|
||||
: numext::mini<Index>(m_matrix.rows(),m_matrix.cols()-m_index.value());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return 1; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return m_matrix.outerStride() + 1;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index row, Index) const
|
||||
{
|
||||
return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index row, Index) const
|
||||
{
|
||||
return m_matrix.coeff(row+rowOffset(), row+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index idx)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index idx) const
|
||||
{
|
||||
return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline CoeffReturnType coeff(Index idx) const
|
||||
{
|
||||
return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index index() const
|
||||
{
|
||||
return m_index.value();
|
||||
}
|
||||
|
||||
protected:
|
||||
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
|
||||
const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
|
||||
|
||||
private:
|
||||
// some compilers may fail to optimize std::max etc in case of compile-time constants...
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
|
||||
// trigger a compile-time error if someone try to call packet
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
|
||||
template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
|
||||
};
|
||||
|
||||
/** \returns an expression of the main diagonal of the matrix \c *this
|
||||
*
|
||||
* \c *this is not required to be square.
|
||||
*
|
||||
* Example: \include MatrixBase_diagonal.cpp
|
||||
* Output: \verbinclude MatrixBase_diagonal.out
|
||||
*
|
||||
* \sa class Diagonal */
|
||||
template<typename Derived>
|
||||
inline typename MatrixBase<Derived>::DiagonalReturnType
|
||||
MatrixBase<Derived>::diagonal()
|
||||
{
|
||||
return DiagonalReturnType(derived());
|
||||
}
|
||||
|
||||
/** This is the const version of diagonal(). */
|
||||
template<typename Derived>
|
||||
inline typename MatrixBase<Derived>::ConstDiagonalReturnType
|
||||
MatrixBase<Derived>::diagonal() const
|
||||
{
|
||||
return ConstDiagonalReturnType(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
|
||||
*
|
||||
* \c *this is not required to be square.
|
||||
*
|
||||
* The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0
|
||||
* and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal.
|
||||
*
|
||||
* Example: \include MatrixBase_diagonal_int.cpp
|
||||
* Output: \verbinclude MatrixBase_diagonal_int.out
|
||||
*
|
||||
* \sa MatrixBase::diagonal(), class Diagonal */
|
||||
template<typename Derived>
|
||||
inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
|
||||
MatrixBase<Derived>::diagonal(Index index)
|
||||
{
|
||||
return DiagonalDynamicIndexReturnType(derived(), index);
|
||||
}
|
||||
|
||||
/** This is the const version of diagonal(Index). */
|
||||
template<typename Derived>
|
||||
inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
|
||||
MatrixBase<Derived>::diagonal(Index index) const
|
||||
{
|
||||
return ConstDiagonalDynamicIndexReturnType(derived(), index);
|
||||
}
|
||||
|
||||
/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
|
||||
*
|
||||
* \c *this is not required to be square.
|
||||
*
|
||||
* The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0
|
||||
* and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal.
|
||||
*
|
||||
* Example: \include MatrixBase_diagonal_template_int.cpp
|
||||
* Output: \verbinclude MatrixBase_diagonal_template_int.out
|
||||
*
|
||||
* \sa MatrixBase::diagonal(), class Diagonal */
|
||||
template<typename Derived>
|
||||
template<int Index_>
|
||||
inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
|
||||
MatrixBase<Derived>::diagonal()
|
||||
{
|
||||
return typename DiagonalIndexReturnType<Index_>::Type(derived());
|
||||
}
|
||||
|
||||
/** This is the const version of diagonal<int>(). */
|
||||
template<typename Derived>
|
||||
template<int Index_>
|
||||
inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
|
||||
MatrixBase<Derived>::diagonal() const
|
||||
{
|
||||
return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_DIAGONAL_H
|
||||
343
external/include/eigen3/Eigen/src/Core/DiagonalMatrix.h
vendored
Normal file
343
external/include/eigen3/Eigen/src/Core/DiagonalMatrix.h
vendored
Normal file
@@ -0,0 +1,343 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_DIAGONALMATRIX_H
|
||||
#define EIGEN_DIAGONALMATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Derived>
|
||||
class DiagonalBase : public EigenBase<Derived>
|
||||
{
|
||||
public:
|
||||
typedef typename internal::traits<Derived>::DiagonalVectorType DiagonalVectorType;
|
||||
typedef typename DiagonalVectorType::Scalar Scalar;
|
||||
typedef typename DiagonalVectorType::RealScalar RealScalar;
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
|
||||
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
|
||||
MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
|
||||
MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
|
||||
IsVectorAtCompileTime = 0,
|
||||
Flags = NoPreferredStorageOrderBit
|
||||
};
|
||||
|
||||
typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
|
||||
typedef DenseMatrixType DenseType;
|
||||
typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType toDenseMatrix() const { return derived(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return diagonal().size(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return diagonal().size(); }
|
||||
|
||||
template<typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,MatrixDerived,LazyProduct>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix) const
|
||||
{
|
||||
return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
|
||||
}
|
||||
|
||||
typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const InverseReturnType
|
||||
inverse() const
|
||||
{
|
||||
return InverseReturnType(diagonal().cwiseInverse());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
|
||||
operator*(const Scalar& scalar) const
|
||||
{
|
||||
return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
|
||||
operator*(const Scalar& scalar, const DiagonalBase& other)
|
||||
{
|
||||
return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/** \class DiagonalMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a diagonal matrix with its storage
|
||||
*
|
||||
* \param _Scalar the type of coefficients
|
||||
* \param SizeAtCompileTime the dimension of the matrix, or Dynamic
|
||||
* \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
|
||||
* to SizeAtCompileTime. Most of the time, you do not need to specify it.
|
||||
*
|
||||
* \sa class DiagonalWrapper
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
: traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
|
||||
typedef DiagonalShape StorageKind;
|
||||
enum {
|
||||
Flags = LvalueBit | NoPreferredStorageOrderBit
|
||||
};
|
||||
};
|
||||
}
|
||||
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
|
||||
class DiagonalMatrix
|
||||
: public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
|
||||
typedef const DiagonalMatrix& Nested;
|
||||
typedef _Scalar Scalar;
|
||||
typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
DiagonalVectorType m_diagonal;
|
||||
|
||||
public:
|
||||
|
||||
/** const version of diagonal(). */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
|
||||
/** \returns a reference to the stored vector of diagonal coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalVectorType& diagonal() { return m_diagonal; }
|
||||
|
||||
/** Default constructor without initialization */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix() {}
|
||||
|
||||
/** Constructs a diagonal matrix with given dimension */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
|
||||
|
||||
/** 2D constructor. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}
|
||||
|
||||
/** 3D constructor. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
|
||||
|
||||
/** Copy constructor. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** copy constructor. prevent a default copy constructor from hiding the other templated constructor */
|
||||
inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {}
|
||||
#endif
|
||||
|
||||
/** generic constructor from expression of the diagonal coefficients */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
|
||||
{}
|
||||
|
||||
/** Copy operator. */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
|
||||
{
|
||||
m_diagonal = other.diagonal();
|
||||
return *this;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalMatrix& operator=(const DiagonalMatrix& other)
|
||||
{
|
||||
m_diagonal = other.diagonal();
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Resizes to given size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void resize(Index size) { m_diagonal.resize(size); }
|
||||
/** Sets all coefficients to zero. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setZero() { m_diagonal.setZero(); }
|
||||
/** Resizes and sets all coefficients to zero. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setZero(Index size) { m_diagonal.setZero(size); }
|
||||
/** Sets this matrix to be the identity matrix of the current size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setIdentity() { m_diagonal.setOnes(); }
|
||||
/** Sets this matrix to be the identity matrix of the given size. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
|
||||
};
|
||||
|
||||
/** \class DiagonalWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a diagonal matrix
|
||||
*
|
||||
* \param _DiagonalVectorType the type of the vector of diagonal coefficients
|
||||
*
|
||||
* This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients,
|
||||
* instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal()
|
||||
* and most of the time this is the only way that it is used.
|
||||
*
|
||||
* \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename _DiagonalVectorType>
|
||||
struct traits<DiagonalWrapper<_DiagonalVectorType> >
|
||||
{
|
||||
typedef _DiagonalVectorType DiagonalVectorType;
|
||||
typedef typename DiagonalVectorType::Scalar Scalar;
|
||||
typedef typename DiagonalVectorType::StorageIndex StorageIndex;
|
||||
typedef DiagonalShape StorageKind;
|
||||
typedef typename traits<DiagonalVectorType>::XprKind XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
|
||||
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
|
||||
MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
|
||||
MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
|
||||
Flags = (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _DiagonalVectorType>
|
||||
class DiagonalWrapper
|
||||
: public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef _DiagonalVectorType DiagonalVectorType;
|
||||
typedef DiagonalWrapper Nested;
|
||||
#endif
|
||||
|
||||
/** Constructor from expression of diagonal coefficients to wrap. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
|
||||
|
||||
/** \returns a const reference to the wrapped expression of diagonal coefficients. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalVectorType& diagonal() const { return m_diagonal; }
|
||||
|
||||
protected:
|
||||
typename DiagonalVectorType::Nested m_diagonal;
|
||||
};
|
||||
|
||||
/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* Example: \include MatrixBase_asDiagonal.cpp
|
||||
* Output: \verbinclude MatrixBase_asDiagonal.out
|
||||
*
|
||||
* \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
|
||||
**/
|
||||
template<typename Derived>
|
||||
inline const DiagonalWrapper<const Derived>
|
||||
MatrixBase<Derived>::asDiagonal() const
|
||||
{
|
||||
return DiagonalWrapper<const Derived>(derived());
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately equal to a diagonal matrix,
|
||||
* within the precision given by \a prec.
|
||||
*
|
||||
* Example: \include MatrixBase_isDiagonal.cpp
|
||||
* Output: \verbinclude MatrixBase_isDiagonal.out
|
||||
*
|
||||
* \sa asDiagonal()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
|
||||
{
|
||||
if(cols() != rows()) return false;
|
||||
RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
RealScalar absOnDiagonal = numext::abs(coeff(j,j));
|
||||
if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
|
||||
}
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = 0; i < j; ++i)
|
||||
{
|
||||
if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;
|
||||
if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; };
|
||||
|
||||
struct Diagonal2Dense {};
|
||||
|
||||
template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
|
||||
|
||||
// Diagonal matrix to Dense assignment
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
|
||||
{
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst.setZero();
|
||||
dst.diagonal() = src.diagonal();
|
||||
}
|
||||
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{ dst.diagonal() += src.diagonal(); }
|
||||
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
||||
{ dst.diagonal() -= src.diagonal(); }
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_DIAGONALMATRIX_H
|
||||
28
external/include/eigen3/Eigen/src/Core/DiagonalProduct.h
vendored
Normal file
28
external/include/eigen3/Eigen/src/Core/DiagonalProduct.h
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_DIAGONALPRODUCT_H
|
||||
#define EIGEN_DIAGONALPRODUCT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename DiagonalDerived>
|
||||
inline const Product<Derived, DiagonalDerived, LazyProduct>
|
||||
MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
|
||||
{
|
||||
return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_DIAGONALPRODUCT_H
|
||||
318
external/include/eigen3/Eigen/src/Core/Dot.h
vendored
Normal file
318
external/include/eigen3/Eigen/src/Core/Dot.h
vendored
Normal file
@@ -0,0 +1,318 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008, 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_DOT_H
|
||||
#define EIGEN_DOT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
|
||||
// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
|
||||
// looking at the static assertions. Thus this is a trick to get better compile errors.
|
||||
template<typename T, typename U,
|
||||
// the NeedToTranspose condition here is taken straight from Assign.h
|
||||
bool NeedToTranspose = T::IsVectorAtCompileTime
|
||||
&& U::IsVectorAtCompileTime
|
||||
&& ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
|
||||
| // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
|
||||
// revert to || as soon as not needed anymore.
|
||||
(int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
|
||||
>
|
||||
struct dot_nocheck
|
||||
{
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE
|
||||
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename U>
|
||||
struct dot_nocheck<T, U, true>
|
||||
{
|
||||
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
|
||||
typedef typename conj_prod::result_type ResScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE
|
||||
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
|
||||
{
|
||||
return a.transpose().template binaryExpr<conj_prod>(b).sum();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \fn MatrixBase::dot
|
||||
* \returns the dot product of *this with other.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \note If the scalar type is complex numbers, then this function returns the hermitian
|
||||
* (sesquilinear) dot product, conjugate-linear in the first variable and linear in the
|
||||
* second variable.
|
||||
*
|
||||
* \sa squaredNorm(), norm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE
|
||||
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
|
||||
#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
|
||||
typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
|
||||
EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
|
||||
#endif
|
||||
|
||||
eigen_assert(size() == other.size());
|
||||
|
||||
return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
|
||||
}
|
||||
|
||||
//---------- implementation of L2 norm and related functions ----------
|
||||
|
||||
/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
|
||||
* In both cases, it consists in the sum of the square of all the matrix entries.
|
||||
* For vectors, this is also equals to the dot product of \c *this with itself.
|
||||
*
|
||||
* \sa dot(), norm(), lpNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
|
||||
{
|
||||
return numext::real((*this).cwiseAbs2().sum());
|
||||
}
|
||||
|
||||
/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
|
||||
* In both cases, it consists in the square root of the sum of the square of all the matrix entries.
|
||||
* For vectors, this is also equals to the square root of the dot product of \c *this with itself.
|
||||
*
|
||||
* \sa lpNorm(), dot(), squaredNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
{
|
||||
return numext::sqrt(squaredNorm());
|
||||
}
|
||||
|
||||
/** \returns an expression of the quotient of \c *this by its own norm.
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0),
|
||||
* then this function returns a copy of the input.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \sa norm(), normalize()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::normalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,2>::type _Nested;
|
||||
_Nested n(derived());
|
||||
RealScalar z = n.squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
if(z>RealScalar(0))
|
||||
return n / numext::sqrt(z);
|
||||
else
|
||||
return n;
|
||||
}
|
||||
|
||||
/** Normalizes the vector, i.e. divides it by its own norm.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
|
||||
*
|
||||
* \sa norm(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
|
||||
{
|
||||
RealScalar z = squaredNorm();
|
||||
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
|
||||
if(z>RealScalar(0))
|
||||
derived() /= numext::sqrt(z);
|
||||
}
|
||||
|
||||
/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This method is analogue to the normalized() method, but it reduces the risk of
|
||||
* underflow and overflow when computing the norm.
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0),
|
||||
* then this function returns a copy of the input.
|
||||
*
|
||||
* \sa stableNorm(), stableNormalize(), normalized()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
|
||||
MatrixBase<Derived>::stableNormalized() const
|
||||
{
|
||||
typedef typename internal::nested_eval<Derived,3>::type _Nested;
|
||||
_Nested n(derived());
|
||||
RealScalar w = n.cwiseAbs().maxCoeff();
|
||||
RealScalar z = (n/w).squaredNorm();
|
||||
if(z>RealScalar(0))
|
||||
return n / (numext::sqrt(z)*w);
|
||||
else
|
||||
return n;
|
||||
}
|
||||
|
||||
/** Normalizes the vector while avoid underflow and overflow
|
||||
*
|
||||
* \only_for_vectors
|
||||
*
|
||||
* This method is analogue to the normalize() method, but it reduces the risk of
|
||||
* underflow and overflow when computing the norm.
|
||||
*
|
||||
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
|
||||
*
|
||||
* \sa stableNorm(), stableNormalized(), normalize()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
|
||||
{
|
||||
RealScalar w = cwiseAbs().maxCoeff();
|
||||
RealScalar z = (derived()/w).squaredNorm();
|
||||
if(z>RealScalar(0))
|
||||
derived() /= numext::sqrt(z)*w;
|
||||
}
|
||||
|
||||
//---------- implementation of other norms ----------
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived, int p>
|
||||
struct lpNorm_selector
|
||||
{
|
||||
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(pow)
|
||||
return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, 1>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.cwiseAbs().sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, 2>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
return m.norm();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct lpNorm_selector<Derived, Infinity>
|
||||
{
|
||||
typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar run(const MatrixBase<Derived>& m)
|
||||
{
|
||||
if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0))
|
||||
return RealScalar(0);
|
||||
return m.cwiseAbs().maxCoeff();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
|
||||
* of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
|
||||
* norm, that is the maximum of the absolute values of the coefficients of \c *this.
|
||||
*
|
||||
* In all cases, if \c *this is empty, then the value 0 is returned.
|
||||
*
|
||||
* \note For matrices, this function does not compute the <a href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink.
|
||||
*
|
||||
* \sa norm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int p>
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
#else
|
||||
MatrixBase<Derived>::RealScalar
|
||||
#endif
|
||||
MatrixBase<Derived>::lpNorm() const
|
||||
{
|
||||
return internal::lpNorm_selector<Derived, p>::run(*this);
|
||||
}
|
||||
|
||||
//---------- implementation of isOrthogonal / isUnitary ----------
|
||||
|
||||
/** \returns true if *this is approximately orthogonal to \a other,
|
||||
* within the precision given by \a prec.
|
||||
*
|
||||
* Example: \include MatrixBase_isOrthogonal.cpp
|
||||
* Output: \verbinclude MatrixBase_isOrthogonal.out
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
bool MatrixBase<Derived>::isOrthogonal
|
||||
(const MatrixBase<OtherDerived>& other, const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,2>::type nested(derived());
|
||||
typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived());
|
||||
return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately an unitary matrix,
|
||||
* within the precision given by \a prec. In the case where the \a Scalar
|
||||
* type is real numbers, a unitary matrix is an orthogonal matrix, whence the name.
|
||||
*
|
||||
* \note This can be used to check whether a family of vectors forms an orthonormal basis.
|
||||
* Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an
|
||||
* orthonormal basis.
|
||||
*
|
||||
* Example: \include MatrixBase_isUnitary.cpp
|
||||
* Output: \verbinclude MatrixBase_isUnitary.out
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
|
||||
{
|
||||
typename internal::nested_eval<Derived,1>::type self(derived());
|
||||
for(Index i = 0; i < cols(); ++i)
|
||||
{
|
||||
if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
|
||||
return false;
|
||||
for(Index j = 0; j < i; ++j)
|
||||
if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_DOT_H
|
||||
159
external/include/eigen3/Eigen/src/Core/EigenBase.h
vendored
Normal file
159
external/include/eigen3/Eigen/src/Core/EigenBase.h
vendored
Normal file
@@ -0,0 +1,159 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_EIGENBASE_H
|
||||
#define EIGEN_EIGENBASE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class EigenBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
|
||||
*
|
||||
* In other words, an EigenBase object is an object that can be copied into a MatrixBase.
|
||||
*
|
||||
* Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
|
||||
*
|
||||
* Notice that this class is trivial, it is only used to disambiguate overloaded functions.
|
||||
*
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> struct EigenBase
|
||||
{
|
||||
// typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
|
||||
|
||||
/** \brief The interface type of indices
|
||||
* \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
|
||||
* \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead.
|
||||
* \sa StorageIndex, \ref TopicPreprocessorDirectives.
|
||||
*/
|
||||
typedef Eigen::Index Index;
|
||||
|
||||
// FIXME is it needed?
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
|
||||
/** \returns a reference to the derived object */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
/** \returns a const reference to the derived object */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& const_cast_derived() const
|
||||
{ return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& const_derived() const
|
||||
{ return *static_cast<const Derived*>(this); }
|
||||
|
||||
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return derived().rows(); }
|
||||
/** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return derived().cols(); }
|
||||
/** \returns the number of coefficients, which is rows()*cols().
|
||||
* \sa rows(), cols(), SizeAtCompileTime. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index size() const { return rows() * cols(); }
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const
|
||||
{ derived().evalTo(dst); }
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void addTo(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
typename Dest::PlainObject res(rows(),cols());
|
||||
evalTo(res);
|
||||
dst += res;
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void subTo(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
typename Dest::PlainObject res(rows(),cols());
|
||||
evalTo(res);
|
||||
dst -= res;
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
dst = dst * this->derived();
|
||||
}
|
||||
|
||||
/** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
|
||||
{
|
||||
// This is the default implementation,
|
||||
// derived class can reimplement it in a more optimized way.
|
||||
dst = this->derived() * dst;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of matrix base methods
|
||||
***************************************************************************/
|
||||
|
||||
/** \brief Copies the generic expression \a other into *this.
|
||||
*
|
||||
* \details The expression must provide a (templated) evalTo(Derived& dst) const
|
||||
* function which does the actual job. In practice, this allows any user to write
|
||||
* its own special matrix without having to modify MatrixBase
|
||||
*
|
||||
* \returns a reference to *this.
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_EIGENBASE_H
|
||||
146
external/include/eigen3/Eigen/src/Core/ForceAlignedAccess.h
vendored
Normal file
146
external/include/eigen3/Eigen/src/Core/ForceAlignedAccess.h
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_FORCEALIGNEDACCESS_H
|
||||
#define EIGEN_FORCEALIGNEDACCESS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class ForceAlignedAccess
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Enforce aligned packet loads and stores regardless of what is requested
|
||||
*
|
||||
* \param ExpressionType the type of the object of which we are forcing aligned packet access
|
||||
*
|
||||
* This class is the return type of MatrixBase::forceAlignedAccess()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::forceAlignedAccess()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ExpressionType>
|
||||
struct traits<ForceAlignedAccess<ExpressionType> > : public traits<ExpressionType>
|
||||
{};
|
||||
}
|
||||
|
||||
template<typename ExpressionType> class ForceAlignedAccess
|
||||
: public internal::dense_xpr_base< ForceAlignedAccess<ExpressionType> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_expression.coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
return m_expression.template packet<Aligned>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacket<Aligned>(row, col, x);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_expression.template packet<Aligned>(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& x)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacket<Aligned>(index, x);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
|
||||
|
||||
protected:
|
||||
const ExpressionType& m_expression;
|
||||
|
||||
private:
|
||||
ForceAlignedAccess& operator=(const ForceAlignedAccess&);
|
||||
};
|
||||
|
||||
/** \returns an expression of *this with forced aligned access
|
||||
* \sa forceAlignedAccessIf(),class ForceAlignedAccess
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const ForceAlignedAccess<Derived>
|
||||
MatrixBase<Derived>::forceAlignedAccess() const
|
||||
{
|
||||
return ForceAlignedAccess<Derived>(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of *this with forced aligned access
|
||||
* \sa forceAlignedAccessIf(), class ForceAlignedAccess
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline ForceAlignedAccess<Derived>
|
||||
MatrixBase<Derived>::forceAlignedAccess()
|
||||
{
|
||||
return ForceAlignedAccess<Derived>(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of *this with forced aligned access if \a Enable is true.
|
||||
* \sa forceAlignedAccess(), class ForceAlignedAccess
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<bool Enable>
|
||||
inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
|
||||
MatrixBase<Derived>::forceAlignedAccessIf() const
|
||||
{
|
||||
return derived(); // FIXME This should not work but apparently is never used
|
||||
}
|
||||
|
||||
/** \returns an expression of *this with forced aligned access if \a Enable is true.
|
||||
* \sa forceAlignedAccess(), class ForceAlignedAccess
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<bool Enable>
|
||||
inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
|
||||
MatrixBase<Derived>::forceAlignedAccessIf()
|
||||
{
|
||||
return derived(); // FIXME This should not work but apparently is never used
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_FORCEALIGNEDACCESS_H
|
||||
155
external/include/eigen3/Eigen/src/Core/Fuzzy.h
vendored
Normal file
155
external/include/eigen3/Eigen/src/Core/Fuzzy.h
vendored
Normal file
@@ -0,0 +1,155 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_FUZZY_H
|
||||
#define EIGEN_FUZZY_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isApprox_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
typename internal::nested_eval<Derived,2>::type nested(x);
|
||||
typename internal::nested_eval<OtherDerived,2>::type otherNested(y);
|
||||
return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct isApprox_selector<Derived, OtherDerived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == y.matrix();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isMuchSmallerThan_object_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct isMuchSmallerThan_scalar_selector
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
|
||||
{
|
||||
return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct isMuchSmallerThan_scalar_selector<Derived, true>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
|
||||
{
|
||||
return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
/** \returns \c true if \c *this is approximately equal to \a other, within the precision
|
||||
* determined by \a prec.
|
||||
*
|
||||
* \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$
|
||||
* are considered to be approximately equal within precision \f$ p \f$ if
|
||||
* \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f]
|
||||
* For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm
|
||||
* L2 norm).
|
||||
*
|
||||
* \note Because of the multiplicativeness of this comparison, one can't use this function
|
||||
* to check whether \c *this is approximately equal to the zero matrix or vector.
|
||||
* Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix
|
||||
* or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const
|
||||
* RealScalar&, RealScalar) instead.
|
||||
*
|
||||
* \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
bool DenseBase<Derived>::isApprox(
|
||||
const DenseBase<OtherDerived>& other,
|
||||
const RealScalar& prec
|
||||
) const
|
||||
{
|
||||
return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
|
||||
}
|
||||
|
||||
/** \returns \c true if the norm of \c *this is much smaller than \a other,
|
||||
* within the precision determined by \a prec.
|
||||
*
|
||||
* \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
|
||||
* considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if
|
||||
* \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f]
|
||||
*
|
||||
* For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason,
|
||||
* the value of the reference scalar \a other should come from the Hilbert-Schmidt norm
|
||||
* of a reference matrix of same dimensions.
|
||||
*
|
||||
* \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool DenseBase<Derived>::isMuchSmallerThan(
|
||||
const typename NumTraits<Scalar>::Real& other,
|
||||
const RealScalar& prec
|
||||
) const
|
||||
{
|
||||
return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);
|
||||
}
|
||||
|
||||
/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other,
|
||||
* within the precision determined by \a prec.
|
||||
*
|
||||
* \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
|
||||
* considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if
|
||||
* \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f]
|
||||
* For matrices, the comparison is done using the Hilbert-Schmidt norm.
|
||||
*
|
||||
* \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
bool DenseBase<Derived>::isMuchSmallerThan(
|
||||
const DenseBase<OtherDerived>& other,
|
||||
const RealScalar& prec
|
||||
) const
|
||||
{
|
||||
return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_FUZZY_H
|
||||
455
external/include/eigen3/Eigen/src/Core/GeneralProduct.h
vendored
Normal file
455
external/include/eigen3/Eigen/src/Core/GeneralProduct.h
vendored
Normal file
@@ -0,0 +1,455 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_GENERAL_PRODUCT_H
|
||||
#define EIGEN_GENERAL_PRODUCT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
enum {
|
||||
Large = 2,
|
||||
Small = 3
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int Rows, int Cols, int Depth> struct product_type_selector;
|
||||
|
||||
template<int Size, int MaxSize> struct product_size_category
|
||||
{
|
||||
enum {
|
||||
#ifndef EIGEN_CUDA_ARCH
|
||||
is_large = MaxSize == Dynamic ||
|
||||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
|
||||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
|
||||
#else
|
||||
is_large = 0,
|
||||
#endif
|
||||
value = is_large ? Large
|
||||
: Size == 1 ? 1
|
||||
: Small
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs> struct product_type
|
||||
{
|
||||
typedef typename remove_all<Lhs>::type _Lhs;
|
||||
typedef typename remove_all<Rhs>::type _Rhs;
|
||||
enum {
|
||||
MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
|
||||
Rows = traits<_Lhs>::RowsAtCompileTime,
|
||||
MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
|
||||
Cols = traits<_Rhs>::ColsAtCompileTime,
|
||||
MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
|
||||
traits<_Rhs>::MaxRowsAtCompileTime),
|
||||
Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
|
||||
traits<_Rhs>::RowsAtCompileTime)
|
||||
};
|
||||
|
||||
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
|
||||
// is to work around an internal compiler error with gcc 4.1 and 4.2.
|
||||
private:
|
||||
enum {
|
||||
rows_select = product_size_category<Rows,MaxRows>::value,
|
||||
cols_select = product_size_category<Cols,MaxCols>::value,
|
||||
depth_select = product_size_category<Depth,MaxDepth>::value
|
||||
};
|
||||
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
|
||||
|
||||
public:
|
||||
enum {
|
||||
value = selector::ret,
|
||||
ret = selector::ret
|
||||
};
|
||||
#ifdef EIGEN_DEBUG_PRODUCT
|
||||
static void debug()
|
||||
{
|
||||
EIGEN_DEBUG_VAR(Rows);
|
||||
EIGEN_DEBUG_VAR(Cols);
|
||||
EIGEN_DEBUG_VAR(Depth);
|
||||
EIGEN_DEBUG_VAR(rows_select);
|
||||
EIGEN_DEBUG_VAR(cols_select);
|
||||
EIGEN_DEBUG_VAR(depth_select);
|
||||
EIGEN_DEBUG_VAR(value);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The following allows to select the kind of product at compile time
|
||||
* based on the three dimensions of the product.
|
||||
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
|
||||
// FIXME I'm not sure the current mapping is the ideal one.
|
||||
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
|
||||
template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
|
||||
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
|
||||
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small, Large, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large, Small, 1> { enum { ret = LazyCoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,1, Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,1, Large> { enum { ret = GemvProduct }; };
|
||||
template<> struct product_type_selector<Small,1, Large> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
|
||||
template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
|
||||
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of Inner Vector Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
// FIXME : maybe the "inner product" could return a Scalar
|
||||
// instead of a 1x1 matrix ??
|
||||
// Pro: more natural for the user
|
||||
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
|
||||
// product ends up to a row-vector times col-vector product... To tackle this use
|
||||
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of Outer Vector Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
/***********************************************************************
|
||||
* Implementation of General Matrix Vector Product
|
||||
***********************************************************************/
|
||||
|
||||
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
|
||||
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
|
||||
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
|
||||
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
|
||||
* Therefore we need a lower level meta selector.
|
||||
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
|
||||
*/
|
||||
namespace internal {
|
||||
|
||||
template<int Side, int StorageOrder, bool BlasCompatible>
|
||||
struct gemv_dense_selector;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
|
||||
};
|
||||
|
||||
template<typename Scalar,int Size>
|
||||
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return 0; }
|
||||
};
|
||||
|
||||
template<typename Scalar,int Size,int MaxSize>
|
||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||
{
|
||||
enum {
|
||||
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
|
||||
PacketSize = internal::packet_traits<Scalar>::size
|
||||
};
|
||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||
#else
|
||||
// Some architectures cannot align on the stack,
|
||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
|
||||
EIGEN_STRONG_INLINE Scalar* data() {
|
||||
return ForceAlignment
|
||||
? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
|
||||
: m_data.array;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// The vector is on the left => transposition
|
||||
template<int StorageOrder, bool BlasCompatible>
|
||||
struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
|
||||
{
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
Transpose<Dest> destT(dest);
|
||||
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
|
||||
gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
|
||||
::run(rhs.transpose(), lhs.transpose(), destT, alpha);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
||||
{
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef typename Dest::Scalar ResScalar;
|
||||
typedef typename Dest::RealScalar RealScalar;
|
||||
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
||||
|
||||
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
|
||||
|
||||
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
|
||||
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
||||
|
||||
// make sure Dest is a compile-time vector type (bug 1166)
|
||||
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
|
||||
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
|
||||
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
|
||||
};
|
||||
|
||||
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
||||
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
|
||||
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
|
||||
|
||||
if(!MightCannotUseDest)
|
||||
{
|
||||
// shortcut if we are sure to be able to use dest directly,
|
||||
// this ease the compiler to generate cleaner and more optimzized code for most common cases
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
dest.data(), 1,
|
||||
compatibleAlpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
||||
|
||||
const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
|
||||
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
|
||||
evalToDest ? dest.data() : static_dest.data());
|
||||
|
||||
if(!evalToDest)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = dest.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
if(!alphaIsCompatible)
|
||||
{
|
||||
MappedDest(actualDestPtr, dest.size()).setZero();
|
||||
compatibleAlpha = RhsScalar(1);
|
||||
}
|
||||
else
|
||||
MappedDest(actualDestPtr, dest.size()) = dest;
|
||||
}
|
||||
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhs.data(), actualRhs.innerStride()),
|
||||
actualDestPtr, 1,
|
||||
compatibleAlpha);
|
||||
|
||||
if (!evalToDest)
|
||||
{
|
||||
if(!alphaIsCompatible)
|
||||
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
|
||||
else
|
||||
dest = MappedDest(actualDestPtr, dest.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
|
||||
{
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef typename Dest::Scalar ResScalar;
|
||||
|
||||
typedef internal::blas_traits<Lhs> LhsBlasTraits;
|
||||
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
|
||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
||||
typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
|
||||
|
||||
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
||||
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
||||
|
||||
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
|
||||
* RhsBlasTraits::extractScalarFactor(rhs);
|
||||
|
||||
enum {
|
||||
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
|
||||
// on, the other hand it is good for the cache to pack the vector anyways...
|
||||
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
|
||||
};
|
||||
|
||||
gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
|
||||
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
|
||||
|
||||
if(!DirectlyUseRhs)
|
||||
{
|
||||
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
Index size = actualRhs.size();
|
||||
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
|
||||
#endif
|
||||
Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
|
||||
}
|
||||
|
||||
typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
|
||||
typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
|
||||
general_matrix_vector_product
|
||||
<Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
|
||||
actualLhs.rows(), actualLhs.cols(),
|
||||
LhsMapper(actualLhs.data(), actualLhs.outerStride()),
|
||||
RhsMapper(actualRhsPtr, 1),
|
||||
dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
|
||||
actualAlpha);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
|
||||
{
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
|
||||
typename nested_eval<Rhs,1>::type actual_rhs(rhs);
|
||||
const Index size = rhs.rows();
|
||||
for(Index k=0; k<size; ++k)
|
||||
dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
|
||||
{
|
||||
template<typename Lhs, typename Rhs, typename Dest>
|
||||
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
|
||||
const Index rows = dest.rows();
|
||||
for(Index i=0; i<rows; ++i)
|
||||
dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of matrix base methods
|
||||
***************************************************************************/
|
||||
|
||||
/** \returns the matrix product of \c *this and \a other.
|
||||
*
|
||||
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
|
||||
*
|
||||
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline const Product<Derived, OtherDerived>
|
||||
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
// A note regarding the function declaration: In MSVC, this function will sometimes
|
||||
// not be inlined since DenseStorage is an unwindable object for dynamic
|
||||
// matrices and product types are holding a member to store the result.
|
||||
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
|
||||
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
|
||||
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
};
|
||||
// note to the lost user:
|
||||
// * for a dot product use: v1.dot(v2)
|
||||
// * for a coeff-wise product use: v1.cwiseProduct(v2)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
#ifdef EIGEN_DEBUG_PRODUCT
|
||||
internal::product_type<Derived,OtherDerived>::debug();
|
||||
#endif
|
||||
|
||||
return Product<Derived, OtherDerived>(derived(), other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
|
||||
*
|
||||
* The returned product will behave like any other expressions: the coefficients of the product will be
|
||||
* computed once at a time as requested. This might be useful in some extremely rare cases when only
|
||||
* a small and no coherent fraction of the result's coefficients have to be computed.
|
||||
*
|
||||
* \warning This version of the matrix product can be much much slower. So use it only if you know
|
||||
* what you are doing and that you measured a true speed improvement.
|
||||
*
|
||||
* \sa operator*(const MatrixBase&)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
const Product<Derived,OtherDerived,LazyProduct>
|
||||
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
enum {
|
||||
ProductIsValid = Derived::ColsAtCompileTime==Dynamic
|
||||
|| OtherDerived::RowsAtCompileTime==Dynamic
|
||||
|| int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
|
||||
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
|
||||
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
|
||||
};
|
||||
// note to the lost user:
|
||||
// * for a dot product use: v1.dot(v2)
|
||||
// * for a coeff-wise product use: v1.cwiseProduct(v2)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
|
||||
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
|
||||
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
|
||||
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
|
||||
|
||||
return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
593
external/include/eigen3/Eigen/src/Core/GenericPacketMath.h
vendored
Normal file
593
external/include/eigen3/Eigen/src/Core/GenericPacketMath.h
vendored
Normal file
@@ -0,0 +1,593 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_GENERIC_PACKET_MATH_H
|
||||
#define EIGEN_GENERIC_PACKET_MATH_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \file GenericPacketMath.h
|
||||
*
|
||||
* Default implementation for types not supported by the vectorization.
|
||||
* In practice these functions are provided to make easier the writing
|
||||
* of generic vectorized code.
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_DEBUG_ALIGNED_LOAD
|
||||
#define EIGEN_DEBUG_ALIGNED_LOAD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
#define EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_DEBUG_ALIGNED_STORE
|
||||
#define EIGEN_DEBUG_ALIGNED_STORE
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_DEBUG_UNALIGNED_STORE
|
||||
#define EIGEN_DEBUG_UNALIGNED_STORE
|
||||
#endif
|
||||
|
||||
struct default_packet_traits
|
||||
{
|
||||
enum {
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 1,
|
||||
HasArg = 0,
|
||||
HasAbs2 = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 1,
|
||||
HasBlend = 0,
|
||||
|
||||
HasDiv = 0,
|
||||
HasSqrt = 0,
|
||||
HasRsqrt = 0,
|
||||
HasExp = 0,
|
||||
HasLog = 0,
|
||||
HasLog1p = 0,
|
||||
HasLog10 = 0,
|
||||
HasPow = 0,
|
||||
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasTan = 0,
|
||||
HasASin = 0,
|
||||
HasACos = 0,
|
||||
HasATan = 0,
|
||||
HasSinh = 0,
|
||||
HasCosh = 0,
|
||||
HasTanh = 0,
|
||||
HasLGamma = 0,
|
||||
HasDiGamma = 0,
|
||||
HasZeta = 0,
|
||||
HasPolygamma = 0,
|
||||
HasErf = 0,
|
||||
HasErfc = 0,
|
||||
HasIGamma = 0,
|
||||
HasIGammac = 0,
|
||||
HasBetaInc = 0,
|
||||
|
||||
HasRound = 0,
|
||||
HasFloor = 0,
|
||||
HasCeil = 0,
|
||||
|
||||
HasSign = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<typename T> struct packet_traits : default_packet_traits
|
||||
{
|
||||
typedef T type;
|
||||
typedef T half;
|
||||
enum {
|
||||
Vectorizable = 0,
|
||||
size = 1,
|
||||
AlignedOnScalar = 0,
|
||||
HasHalfPacket = 0
|
||||
};
|
||||
enum {
|
||||
HasAdd = 0,
|
||||
HasSub = 0,
|
||||
HasMul = 0,
|
||||
HasNegate = 0,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasConj = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<typename T> struct packet_traits<const T> : packet_traits<T> { };
|
||||
|
||||
template <typename Src, typename Tgt> struct type_casting_traits {
|
||||
enum {
|
||||
VectorizedCast = 0,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
|
||||
template <typename SrcPacket, typename TgtPacket>
|
||||
EIGEN_DEVICE_FUNC inline TgtPacket
|
||||
pcast(const SrcPacket& a) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
template <typename SrcPacket, typename TgtPacket>
|
||||
EIGEN_DEVICE_FUNC inline TgtPacket
|
||||
pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
|
||||
template <typename SrcPacket, typename TgtPacket>
|
||||
EIGEN_DEVICE_FUNC inline TgtPacket
|
||||
pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
|
||||
return static_cast<TgtPacket>(a);
|
||||
}
|
||||
|
||||
/** \internal \returns a + b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
padd(const Packet& a,
|
||||
const Packet& b) { return a+b; }
|
||||
|
||||
/** \internal \returns a - b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
psub(const Packet& a,
|
||||
const Packet& b) { return a-b; }
|
||||
|
||||
/** \internal \returns -a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pnegate(const Packet& a) { return -a; }
|
||||
|
||||
/** \internal \returns conj(a) (coeff-wise) */
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pconj(const Packet& a) { return numext::conj(a); }
|
||||
|
||||
/** \internal \returns a * b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmul(const Packet& a,
|
||||
const Packet& b) { return a*b; }
|
||||
|
||||
/** \internal \returns a / b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pdiv(const Packet& a,
|
||||
const Packet& b) { return a/b; }
|
||||
|
||||
/** \internal \returns the min of \a a and \a b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmin(const Packet& a,
|
||||
const Packet& b) { return numext::mini(a, b); }
|
||||
|
||||
/** \internal \returns the max of \a a and \a b (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmax(const Packet& a,
|
||||
const Packet& b) { return numext::maxi(a, b); }
|
||||
|
||||
/** \internal \returns the absolute value of \a a */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pabs(const Packet& a) { using std::abs; return abs(a); }
|
||||
|
||||
/** \internal \returns the phase angle of \a a */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
parg(const Packet& a) { using numext::arg; return arg(a); }
|
||||
|
||||
/** \internal \returns the bitwise and of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pand(const Packet& a, const Packet& b) { return a & b; }
|
||||
|
||||
/** \internal \returns the bitwise or of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
por(const Packet& a, const Packet& b) { return a | b; }
|
||||
|
||||
/** \internal \returns the bitwise xor of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pxor(const Packet& a, const Packet& b) { return a ^ b; }
|
||||
|
||||
/** \internal \returns the bitwise andnot of \a a and \a b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pandnot(const Packet& a, const Packet& b) { return a & (!b); }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet version of \a *from, (un-aligned load) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
|
||||
|
||||
/** \internal \returns a packet with elements of \a *from duplicated.
|
||||
* For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
|
||||
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
|
||||
* Currently, this function is only used for scalar * complex products.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
|
||||
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
|
||||
/** \internal \returns a packet with elements of \a *from quadrupled.
|
||||
* For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
|
||||
* replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
|
||||
* Currently, this function is only used in matrix products.
|
||||
* For packet-size smaller or equal to 4, this function is equivalent to pload1
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploadquad(const typename unpacket_traits<Packet>::type* from)
|
||||
{ return pload1<Packet>(from); }
|
||||
|
||||
/** \internal equivalent to
|
||||
* \code
|
||||
* a0 = pload1(a+0);
|
||||
* a1 = pload1(a+1);
|
||||
* a2 = pload1(a+2);
|
||||
* a3 = pload1(a+3);
|
||||
* \endcode
|
||||
* \sa pset1, pload1, ploaddup, pbroadcast2
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC
|
||||
inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
|
||||
Packet& a0, Packet& a1, Packet& a2, Packet& a3)
|
||||
{
|
||||
a0 = pload1<Packet>(a+0);
|
||||
a1 = pload1<Packet>(a+1);
|
||||
a2 = pload1<Packet>(a+2);
|
||||
a3 = pload1<Packet>(a+3);
|
||||
}
|
||||
|
||||
/** \internal equivalent to
|
||||
* \code
|
||||
* a0 = pload1(a+0);
|
||||
* a1 = pload1(a+1);
|
||||
* \endcode
|
||||
* \sa pset1, pload1, ploaddup, pbroadcast4
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC
|
||||
inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
|
||||
Packet& a0, Packet& a1)
|
||||
{
|
||||
a0 = pload1<Packet>(a+0);
|
||||
a1 = pload1<Packet>(a+1);
|
||||
}
|
||||
|
||||
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
|
||||
plset(const typename unpacket_traits<Packet>::type& a) { return a; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, (un-aligned store) */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
|
||||
{ (*to) = from; }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
|
||||
{ return ploadu<Packet>(from); }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
|
||||
{ pstore(to, from); }
|
||||
|
||||
/** \internal tries to do cache prefetching of \a addr */
|
||||
template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
#if defined(__LP64__)
|
||||
// 64-bit pointer operand constraint for inlined asm
|
||||
asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
|
||||
#else
|
||||
// 32-bit pointer operand constraint for inlined asm
|
||||
asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
|
||||
#endif
|
||||
#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
|
||||
__builtin_prefetch(addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \internal \returns the first element of a packet */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
preduxp(const Packet* vecs) { return vecs[0]; }
|
||||
|
||||
/** \internal \returns the sum of the elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the sum of the elements of \a a by block of 4 elements.
|
||||
* For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
|
||||
* For packet-size smaller or equal to 4, this boils down to a noop.
|
||||
*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline
|
||||
typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
|
||||
predux_downto4(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the product of the elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the min of the elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the max of the elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns the reversed elements of \a a*/
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
|
||||
{ return a; }
|
||||
|
||||
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
|
||||
{
|
||||
// FIXME: uncomment the following in case we drop the internal imag and real functions.
|
||||
// using std::imag;
|
||||
// using std::real;
|
||||
return Packet(imag(a),real(a));
|
||||
}
|
||||
|
||||
/**************************
|
||||
* Special math functions
|
||||
***************************/
|
||||
|
||||
/** \internal \returns the sine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet psin(const Packet& a) { using std::sin; return sin(a); }
|
||||
|
||||
/** \internal \returns the cosine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pcos(const Packet& a) { using std::cos; return cos(a); }
|
||||
|
||||
/** \internal \returns the tan of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet ptan(const Packet& a) { using std::tan; return tan(a); }
|
||||
|
||||
/** \internal \returns the arc sine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pasin(const Packet& a) { using std::asin; return asin(a); }
|
||||
|
||||
/** \internal \returns the arc cosine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pacos(const Packet& a) { using std::acos; return acos(a); }
|
||||
|
||||
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan(const Packet& a) { using std::atan; return atan(a); }
|
||||
|
||||
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
|
||||
|
||||
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
|
||||
|
||||
/** \internal \returns the exp of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
|
||||
|
||||
/** \internal \returns the log of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog(const Packet& a) { using std::log; return log(a); }
|
||||
|
||||
/** \internal \returns the log1p of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog1p(const Packet& a) { return numext::log1p(a); }
|
||||
|
||||
/** \internal \returns the log10 of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet plog10(const Packet& a) { using std::log10; return log10(a); }
|
||||
|
||||
/** \internal \returns the square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
|
||||
|
||||
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet prsqrt(const Packet& a) {
|
||||
return pdiv(pset1<Packet>(1), psqrt(a));
|
||||
}
|
||||
|
||||
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pround(const Packet& a) { using numext::round; return round(a); }
|
||||
|
||||
/** \internal \returns the floor of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
|
||||
|
||||
/** \internal \returns the ceil of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
|
||||
|
||||
/***************************************************************************
|
||||
* The following functions might not have to be overwritten for vectorized types
|
||||
***************************************************************************/
|
||||
|
||||
/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
|
||||
// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
|
||||
template<typename Packet>
|
||||
inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
|
||||
{
|
||||
pstore(to, pset1<Packet>(a));
|
||||
}
|
||||
|
||||
/** \internal \returns a * b + c (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pmadd(const Packet& a,
|
||||
const Packet& b,
|
||||
const Packet& c)
|
||||
{ return padd(pmul(a, b),c); }
|
||||
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||
template<typename Packet, int Alignment>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
|
||||
{
|
||||
if(Alignment >= unpacket_traits<Packet>::alignment)
|
||||
return pload<Packet>(from);
|
||||
else
|
||||
return ploadu<Packet>(from);
|
||||
}
|
||||
|
||||
/** \internal copy the packet \a from to \a *to.
|
||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||
template<typename Scalar, typename Packet, int Alignment>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
|
||||
{
|
||||
if(Alignment >= unpacket_traits<Packet>::alignment)
|
||||
pstore(to, from);
|
||||
else
|
||||
pstoreu(to, from);
|
||||
}
|
||||
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
* Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
|
||||
* hardware if available to speedup the loading of data that won't be modified
|
||||
* by the current computation.
|
||||
*/
|
||||
template<typename Packet, int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
|
||||
{
|
||||
return ploadt<Packet, LoadMode>(from);
|
||||
}
|
||||
|
||||
/** \internal default implementation of palign() allowing partial specialization */
|
||||
template<int Offset,typename PacketType>
|
||||
struct palign_impl
|
||||
{
|
||||
// by default data are aligned, so there is nothing to be done :)
|
||||
static inline void run(PacketType&, const PacketType&) {}
|
||||
};
|
||||
|
||||
/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
|
||||
* of \a first and \a Offset first elements of \a second.
|
||||
*
|
||||
* This function is currently only used to optimize matrix-vector products on unligned matrices.
|
||||
* It takes 2 packets that represent a contiguous memory array, and returns a packet starting
|
||||
* at the position \a Offset. For instance, for packets of 4 elements, we have:
|
||||
* Input:
|
||||
* - first = {f0,f1,f2,f3}
|
||||
* - second = {s0,s1,s2,s3}
|
||||
* Output:
|
||||
* - if Offset==0 then {f0,f1,f2,f3}
|
||||
* - if Offset==1 then {f1,f2,f3,s0}
|
||||
* - if Offset==2 then {f2,f3,s0,s1}
|
||||
* - if Offset==3 then {f3,s0,s1,s3}
|
||||
*/
|
||||
template<int Offset,typename PacketType>
|
||||
inline void palign(PacketType& first, const PacketType& second)
|
||||
{
|
||||
palign_impl<Offset,PacketType>::run(first,second);
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* Fast complex products (GCC generates a function call which is very slow)
|
||||
***************************************************************************/
|
||||
|
||||
// Eigen+CUDA does not support complexes.
|
||||
#ifndef __CUDACC__
|
||||
|
||||
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
|
||||
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
|
||||
|
||||
template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
|
||||
{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* PacketBlock, that is a collection of N packets where the number of words
|
||||
* in the packet is a multiple of N.
|
||||
***************************************************************************/
|
||||
template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
|
||||
Packet packet[N];
|
||||
};
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
|
||||
// Nothing to do in the scalar case, i.e. a 1x1 matrix.
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* Selector, i.e. vector of N boolean values used to select (i.e. blend)
|
||||
* words from 2 packets.
|
||||
***************************************************************************/
|
||||
template <size_t N> struct Selector {
|
||||
bool select[N];
|
||||
};
|
||||
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
|
||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with the first coefficient replaced by the scalar b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||
{
|
||||
// Default implementation based on pblend.
|
||||
// It must be specialized for higher performance.
|
||||
Selector<unpacket_traits<Packet>::size> mask;
|
||||
mask.select[0] = true;
|
||||
// This for loop should be optimized away by the compiler.
|
||||
for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
|
||||
mask.select[i] = false;
|
||||
return pblend(mask, pset1<Packet>(b), a);
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with the last coefficient replaced by the scalar b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||
{
|
||||
// Default implementation based on pblend.
|
||||
// It must be specialized for higher performance.
|
||||
Selector<unpacket_traits<Packet>::size> mask;
|
||||
// This for loop should be optimized away by the compiler.
|
||||
for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
|
||||
mask.select[i] = false;
|
||||
mask.select[unpacket_traits<Packet>::size-1] = true;
|
||||
return pblend(mask, pset1<Packet>(b), a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_GENERIC_PACKET_MATH_H
|
||||
187
external/include/eigen3/Eigen/src/Core/GlobalFunctions.h
vendored
Normal file
187
external/include/eigen3/Eigen/src/Core/GlobalFunctions.h
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_GLOBAL_FUNCTIONS_H
|
||||
#define EIGEN_GLOBAL_FUNCTIONS_H
|
||||
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
|
||||
/** \returns an expression of the coefficient-wise DOC_OP of \a x
|
||||
|
||||
DOC_DETAILS
|
||||
|
||||
\sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_##NAME">Math functions</a>, class CwiseUnaryOp
|
||||
*/ \
|
||||
template<typename Derived> \
|
||||
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
|
||||
NAME(const Eigen::ArrayBase<Derived>& x);
|
||||
|
||||
#else
|
||||
|
||||
#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
|
||||
template<typename Derived> \
|
||||
inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
|
||||
(NAME)(const Eigen::ArrayBase<Derived>& x) { \
|
||||
return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
|
||||
}
|
||||
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
|
||||
\
|
||||
template<typename Derived> \
|
||||
struct NAME##_retval<ArrayBase<Derived> > \
|
||||
{ \
|
||||
typedef const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> type; \
|
||||
}; \
|
||||
template<typename Derived> \
|
||||
struct NAME##_impl<ArrayBase<Derived> > \
|
||||
{ \
|
||||
static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \
|
||||
{ \
|
||||
return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived()); \
|
||||
} \
|
||||
};
|
||||
|
||||
namespace Eigen
|
||||
{
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
|
||||
|
||||
/** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
|
||||
*
|
||||
* \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Derived,typename ScalarExponent>
|
||||
inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
|
||||
#else
|
||||
template<typename Derived,typename ScalarExponent>
|
||||
inline typename internal::enable_if< !(internal::is_same<typename Derived::Scalar,ScalarExponent>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent),
|
||||
const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
|
||||
return x.derived().pow(exponent);
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow)
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
|
||||
return x.derived().pow(exponent);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
|
||||
*
|
||||
* This function computes the coefficient-wise power.
|
||||
*
|
||||
* Example: \include Cwise_array_power_array.cpp
|
||||
* Output: \verbinclude Cwise_array_power_array.out
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
template<typename Derived,typename ExponentDerived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
|
||||
pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents)
|
||||
{
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
|
||||
x.derived(),
|
||||
exponents.derived()
|
||||
);
|
||||
}
|
||||
|
||||
/** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
|
||||
*
|
||||
* This function computes the coefficient-wise power between a scalar and an array of exponents.
|
||||
*
|
||||
* \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
|
||||
*
|
||||
* Example: \include Cwise_scalar_power_array.cpp
|
||||
* Output: \verbinclude Cwise_scalar_power_array.out
|
||||
*
|
||||
* \sa ArrayBase::pow()
|
||||
*
|
||||
* \relates ArrayBase
|
||||
*/
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Scalar,typename Derived>
|
||||
inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar,Derived::Scalar>,Constant<Scalar>,Derived>
|
||||
pow(const Scalar& x,const Eigen::ArrayBase<Derived>& x);
|
||||
#else
|
||||
template<typename Scalar, typename Derived>
|
||||
inline typename internal::enable_if< !(internal::is_same<typename Derived::Scalar,Scalar>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar),
|
||||
const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type
|
||||
pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
|
||||
{
|
||||
return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)(
|
||||
typename internal::plain_constant_type<Derived,Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)
|
||||
pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
|
||||
{
|
||||
return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)(
|
||||
typename internal::plain_constant_type<Derived,typename Derived::Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
namespace internal
|
||||
{
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...)
|
||||
|
||||
#endif // EIGEN_GLOBAL_FUNCTIONS_H
|
||||
225
external/include/eigen3/Eigen/src/Core/IO.h
vendored
Normal file
225
external/include/eigen3/Eigen/src/Core/IO.h
vendored
Normal file
@@ -0,0 +1,225 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_IO_H
|
||||
#define EIGEN_IO_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
enum { DontAlignCols = 1 };
|
||||
enum { StreamPrecision = -1,
|
||||
FullPrecision = -2 };
|
||||
|
||||
namespace internal {
|
||||
template<typename Derived>
|
||||
std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt);
|
||||
}
|
||||
|
||||
/** \class IOFormat
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Stores a set of parameters controlling the way matrices are printed
|
||||
*
|
||||
* List of available parameters:
|
||||
* - \b precision number of digits for floating point values, or one of the special constants \c StreamPrecision and \c FullPrecision.
|
||||
* The default is the special value \c StreamPrecision which means to use the
|
||||
* stream's own precision setting, as set for instance using \c cout.precision(3). The other special value
|
||||
* \c FullPrecision means that the number of digits will be computed to match the full precision of each floating-point
|
||||
* type.
|
||||
* - \b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \c DontAlignCols which
|
||||
* allows to disable the alignment of columns, resulting in faster code.
|
||||
* - \b coeffSeparator string printed between two coefficients of the same row
|
||||
* - \b rowSeparator string printed between two rows
|
||||
* - \b rowPrefix string printed at the beginning of each row
|
||||
* - \b rowSuffix string printed at the end of each row
|
||||
* - \b matPrefix string printed at the beginning of the matrix
|
||||
* - \b matSuffix string printed at the end of the matrix
|
||||
*
|
||||
* Example: \include IOFormat.cpp
|
||||
* Output: \verbinclude IOFormat.out
|
||||
*
|
||||
* \sa DenseBase::format(), class WithFormat
|
||||
*/
|
||||
struct IOFormat
|
||||
{
|
||||
/** Default constructor, see class IOFormat for the meaning of the parameters */
|
||||
IOFormat(int _precision = StreamPrecision, int _flags = 0,
|
||||
const std::string& _coeffSeparator = " ",
|
||||
const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
|
||||
const std::string& _matPrefix="", const std::string& _matSuffix="")
|
||||
: matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
|
||||
rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
|
||||
{
|
||||
// TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
|
||||
// don't add rowSpacer if columns are not to be aligned
|
||||
if((flags & DontAlignCols))
|
||||
return;
|
||||
int i = int(matSuffix.length())-1;
|
||||
while (i>=0 && matSuffix[i]!='\n')
|
||||
{
|
||||
rowSpacer += ' ';
|
||||
i--;
|
||||
}
|
||||
}
|
||||
std::string matPrefix, matSuffix;
|
||||
std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer;
|
||||
std::string coeffSeparator;
|
||||
int precision;
|
||||
int flags;
|
||||
};
|
||||
|
||||
/** \class WithFormat
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Pseudo expression providing matrix output with given format
|
||||
*
|
||||
* \tparam ExpressionType the type of the object on which IO stream operations are performed
|
||||
*
|
||||
* This class represents an expression with stream operators controlled by a given IOFormat.
|
||||
* It is the return type of DenseBase::format()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* See class IOFormat for some examples.
|
||||
*
|
||||
* \sa DenseBase::format(), class IOFormat
|
||||
*/
|
||||
template<typename ExpressionType>
|
||||
class WithFormat
|
||||
{
|
||||
public:
|
||||
|
||||
WithFormat(const ExpressionType& matrix, const IOFormat& format)
|
||||
: m_matrix(matrix), m_format(format)
|
||||
{}
|
||||
|
||||
friend std::ostream & operator << (std::ostream & s, const WithFormat& wf)
|
||||
{
|
||||
return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format);
|
||||
}
|
||||
|
||||
protected:
|
||||
typename ExpressionType::Nested m_matrix;
|
||||
IOFormat m_format;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
// NOTE: This helper is kept for backward compatibility with previous code specializing
|
||||
// this internal::significant_decimals_impl structure. In the future we should directly
|
||||
// call digits10() which has been introduced in July 2016 in 3.3.
|
||||
template<typename Scalar>
|
||||
struct significant_decimals_impl
|
||||
{
|
||||
static inline int run()
|
||||
{
|
||||
return NumTraits<Scalar>::digits10();
|
||||
}
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* print the matrix \a _m to the output stream \a s using the output format \a fmt */
|
||||
template<typename Derived>
|
||||
std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
|
||||
{
|
||||
if(_m.size() == 0)
|
||||
{
|
||||
s << fmt.matPrefix << fmt.matSuffix;
|
||||
return s;
|
||||
}
|
||||
|
||||
typename Derived::Nested m = _m;
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
Index width = 0;
|
||||
|
||||
std::streamsize explicit_precision;
|
||||
if(fmt.precision == StreamPrecision)
|
||||
{
|
||||
explicit_precision = 0;
|
||||
}
|
||||
else if(fmt.precision == FullPrecision)
|
||||
{
|
||||
if (NumTraits<Scalar>::IsInteger)
|
||||
{
|
||||
explicit_precision = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
explicit_precision = significant_decimals_impl<Scalar>::run();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
explicit_precision = fmt.precision;
|
||||
}
|
||||
|
||||
std::streamsize old_precision = 0;
|
||||
if(explicit_precision) old_precision = s.precision(explicit_precision);
|
||||
|
||||
bool align_cols = !(fmt.flags & DontAlignCols);
|
||||
if(align_cols)
|
||||
{
|
||||
// compute the largest width
|
||||
for(Index j = 0; j < m.cols(); ++j)
|
||||
for(Index i = 0; i < m.rows(); ++i)
|
||||
{
|
||||
std::stringstream sstr;
|
||||
sstr.copyfmt(s);
|
||||
sstr << m.coeff(i,j);
|
||||
width = std::max<Index>(width, Index(sstr.str().length()));
|
||||
}
|
||||
}
|
||||
s << fmt.matPrefix;
|
||||
for(Index i = 0; i < m.rows(); ++i)
|
||||
{
|
||||
if (i)
|
||||
s << fmt.rowSpacer;
|
||||
s << fmt.rowPrefix;
|
||||
if(width) s.width(width);
|
||||
s << m.coeff(i, 0);
|
||||
for(Index j = 1; j < m.cols(); ++j)
|
||||
{
|
||||
s << fmt.coeffSeparator;
|
||||
if (width) s.width(width);
|
||||
s << m.coeff(i, j);
|
||||
}
|
||||
s << fmt.rowSuffix;
|
||||
if( i < m.rows() - 1)
|
||||
s << fmt.rowSeparator;
|
||||
}
|
||||
s << fmt.matSuffix;
|
||||
if(explicit_precision) s.precision(old_precision);
|
||||
return s;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \relates DenseBase
|
||||
*
|
||||
* Outputs the matrix, to the given stream.
|
||||
*
|
||||
* If you wish to print the matrix with a format different than the default, use DenseBase::format().
|
||||
*
|
||||
* It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers.
|
||||
* If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default parameters.
|
||||
*
|
||||
* \sa DenseBase::format()
|
||||
*/
|
||||
template<typename Derived>
|
||||
std::ostream & operator <<
|
||||
(std::ostream & s,
|
||||
const DenseBase<Derived> & m)
|
||||
{
|
||||
return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_IO_H
|
||||
118
external/include/eigen3/Eigen/src/Core/Inverse.h
vendored
Normal file
118
external/include/eigen3/Eigen/src/Core/Inverse.h
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_INVERSE_H
|
||||
#define EIGEN_INVERSE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename XprType,typename StorageKind> class InverseImpl;
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename XprType>
|
||||
struct traits<Inverse<XprType> >
|
||||
: traits<typename XprType::PlainObject>
|
||||
{
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef traits<PlainObject> BaseTraits;
|
||||
enum {
|
||||
Flags = BaseTraits::Flags & RowMajorBit
|
||||
};
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class Inverse
|
||||
*
|
||||
* \brief Expression of the inverse of another expression
|
||||
*
|
||||
* \tparam XprType the type of the expression we are taking the inverse
|
||||
*
|
||||
* This class represents an abstract expression of A.inverse()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
*/
|
||||
template<typename XprType>
|
||||
class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind>
|
||||
{
|
||||
public:
|
||||
typedef typename XprType::StorageIndex StorageIndex;
|
||||
typedef typename XprType::PlainObject PlainObject;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||
typedef typename internal::remove_all<XprTypeNested>::type XprTypeNestedCleaned;
|
||||
typedef typename internal::ref_selector<Inverse>::type Nested;
|
||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
|
||||
: m_xpr(xpr)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
XprTypeNested m_xpr;
|
||||
};
|
||||
|
||||
// Generic API dispatcher
|
||||
template<typename XprType, typename StorageKind>
|
||||
class InverseImpl
|
||||
: public internal::generic_xpr_base<Inverse<XprType> >::type
|
||||
{
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<Inverse<XprType> >::type Base;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
private:
|
||||
|
||||
Scalar coeff(Index row, Index col) const;
|
||||
Scalar coeff(Index i) const;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \brief Default evaluator for Inverse expression.
|
||||
*
|
||||
* This default evaluator for Inverse expression simply evaluate the inverse into a temporary
|
||||
* by a call to internal::call_assignment_no_alias.
|
||||
* Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for
|
||||
* there own nested expression.
|
||||
*
|
||||
* \sa class Inverse
|
||||
*/
|
||||
template<typename ArgType>
|
||||
struct unary_evaluator<Inverse<ArgType> >
|
||||
: public evaluator<typename Inverse<ArgType>::PlainObject>
|
||||
{
|
||||
typedef Inverse<ArgType> InverseType;
|
||||
typedef typename InverseType::PlainObject PlainObject;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
enum { Flags = Base::Flags | EvalBeforeNestingBit };
|
||||
|
||||
unary_evaluator(const InverseType& inv_xpr)
|
||||
: m_result(inv_xpr.rows(), inv_xpr.cols())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
internal::call_assignment_no_alias(m_result, inv_xpr);
|
||||
}
|
||||
|
||||
protected:
|
||||
PlainObject m_result;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_INVERSE_H
|
||||
171
external/include/eigen3/Eigen/src/Core/Map.h
vendored
Normal file
171
external/include/eigen3/Eigen/src/Core/Map.h
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MAP_H
|
||||
#define EIGEN_MAP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType>
|
||||
struct traits<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
: public traits<PlainObjectType>
|
||||
{
|
||||
typedef traits<PlainObjectType> TraitsBase;
|
||||
enum {
|
||||
PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
|
||||
? PlainObjectType::ColsAtCompileTime
|
||||
: PlainObjectType::RowsAtCompileTime,
|
||||
|
||||
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
|
||||
? int(PlainObjectType::InnerStrideAtCompileTime)
|
||||
: int(StrideType::InnerStrideAtCompileTime),
|
||||
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
||||
? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
|
||||
? Dynamic
|
||||
: int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
|
||||
: int(StrideType::OuterStrideAtCompileTime),
|
||||
Alignment = int(MapOptions)&int(AlignedMask),
|
||||
Flags0 = TraitsBase::Flags & (~NestByRefBit),
|
||||
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
|
||||
};
|
||||
private:
|
||||
enum { Options }; // Expressions don't have Options
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Map
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing array of data.
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
|
||||
* of an ordinary, contiguous array. This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class represents a matrix or vector expression mapping an existing array of data.
|
||||
* It can be used to let Eigen interface without any overhead with non-Eigen data structures,
|
||||
* such as plain C arrays or structures from other libraries. By default, it assumes that the
|
||||
* data is laid out contiguously in memory. You can however override this by explicitly specifying
|
||||
* inner and outer strides.
|
||||
*
|
||||
* Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix:
|
||||
* \include Map_simple.cpp
|
||||
* Output: \verbinclude Map_simple.out
|
||||
*
|
||||
* If you need to map non-contiguous arrays, you can do so by specifying strides:
|
||||
*
|
||||
* Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer
|
||||
* increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time
|
||||
* fixed value.
|
||||
* \include Map_inner_stride.cpp
|
||||
* Output: \verbinclude Map_inner_stride.out
|
||||
*
|
||||
* Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping
|
||||
* as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns.
|
||||
* Here, we're specifying the outer stride as a runtime parameter. Note that here \c OuterStride<> is
|
||||
* a short version of \c OuterStride<Dynamic> because the default template parameter of OuterStride
|
||||
* is \c Dynamic
|
||||
* \include Map_outer_stride.cpp
|
||||
* Output: \verbinclude Map_outer_stride.out
|
||||
*
|
||||
* For more details and for an example of specifying both an inner and an outer stride, see class Stride.
|
||||
*
|
||||
* \b Tip: to change the array of data mapped by a Map object, you can use the C++
|
||||
* placement new syntax:
|
||||
*
|
||||
* Example: \include Map_placement_new.cpp
|
||||
* Output: \verbinclude Map_placement_new.out
|
||||
*
|
||||
* This class is the return type of PlainObjectBase::Map() but can also be used directly.
|
||||
*
|
||||
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
|
||||
*/
|
||||
template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
|
||||
: public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef MapBase<Map> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Map)
|
||||
|
||||
typedef typename Base::PointerType PointerType;
|
||||
typedef PointerType PointerArgType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const
|
||||
{
|
||||
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const
|
||||
{
|
||||
return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
|
||||
: int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
|
||||
: IsVectorAtCompileTime ? (this->size() * innerStride())
|
||||
: (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
|
||||
: (this->rows() * innerStride());
|
||||
}
|
||||
|
||||
/** Constructor in the fixed-size case.
|
||||
*
|
||||
* \param dataPtr pointer to the array to map
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
|
||||
/** Constructor in the dynamic-size vector case.
|
||||
*
|
||||
* \param dataPtr pointer to the array to map
|
||||
* \param size the size of the vector expression
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
|
||||
/** Constructor in the dynamic-size matrix case.
|
||||
*
|
||||
* \param dataPtr pointer to the array to map
|
||||
* \param rows the number of rows of the matrix expression
|
||||
* \param cols the number of columns of the matrix expression
|
||||
* \param stride optional Stride object, passing the strides.
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
|
||||
: Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
|
||||
{
|
||||
PlainObjectType::Base::_check_template_params();
|
||||
}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
|
||||
|
||||
protected:
|
||||
StrideType m_stride;
|
||||
};
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MAP_H
|
||||
303
external/include/eigen3/Eigen/src/Core/MapBase.h
vendored
Normal file
303
external/include/eigen3/Eigen/src/Core/MapBase.h
vendored
Normal file
@@ -0,0 +1,303 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MAPBASE_H
|
||||
#define EIGEN_MAPBASE_H
|
||||
|
||||
#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
|
||||
EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
|
||||
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for dense Map and Block expression with direct access
|
||||
*
|
||||
* This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense
|
||||
* Map and Block objects with direct access.
|
||||
* Typical users do not have to directly deal with this class.
|
||||
*
|
||||
* This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN.
|
||||
* See \link TopicCustomizing_Plugins customizing Eigen \endlink for details.
|
||||
*
|
||||
* The \c Derived class has to provide the following two methods describing the memory layout:
|
||||
* \code Index innerStride() const; \endcode
|
||||
* \code Index outerStride() const; \endcode
|
||||
*
|
||||
* \sa class Map, class Block
|
||||
*/
|
||||
template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
||||
: public internal::dense_xpr_base<Derived>::type
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<Derived>::type Base;
|
||||
enum {
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
|
||||
SizeAtCompileTime = Base::SizeAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::conditional<
|
||||
bool(internal::is_lvalue<Derived>::value),
|
||||
Scalar *,
|
||||
const Scalar *>::type
|
||||
PointerType;
|
||||
|
||||
using Base::derived;
|
||||
// using Base::RowsAtCompileTime;
|
||||
// using Base::ColsAtCompileTime;
|
||||
// using Base::SizeAtCompileTime;
|
||||
using Base::MaxRowsAtCompileTime;
|
||||
using Base::MaxColsAtCompileTime;
|
||||
using Base::MaxSizeAtCompileTime;
|
||||
using Base::IsVectorAtCompileTime;
|
||||
using Base::Flags;
|
||||
using Base::IsRowMajor;
|
||||
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::coeff;
|
||||
using Base::coeffRef;
|
||||
using Base::lazyAssign;
|
||||
using Base::eval;
|
||||
|
||||
using Base::innerStride;
|
||||
using Base::outerStride;
|
||||
using Base::rowStride;
|
||||
using Base::colStride;
|
||||
|
||||
// bug 217 - compile error on ICC 11.1
|
||||
using Base::operator=;
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
/** \copydoc DenseBase::rows() */
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
|
||||
/** \copydoc DenseBase::cols() */
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
|
||||
|
||||
/** Returns a pointer to the first coefficient of the matrix or vector.
|
||||
*
|
||||
* \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride().
|
||||
*
|
||||
* \sa innerStride(), outerStride()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
|
||||
|
||||
/** \copydoc PlainObjectBase::coeff(Index,Index) const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeff(Index rowId, Index colId) const
|
||||
{
|
||||
return m_data[colId * colStride() + rowId * rowStride()];
|
||||
}
|
||||
|
||||
/** \copydoc PlainObjectBase::coeff(Index) const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeff(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
return m_data[index * innerStride()];
|
||||
}
|
||||
|
||||
/** \copydoc PlainObjectBase::coeffRef(Index,Index) const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return this->m_data[colId * colStride() + rowId * rowStride()];
|
||||
}
|
||||
|
||||
/** \copydoc PlainObjectBase::coeffRef(Index) const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
return this->m_data[index * innerStride()];
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index rowId, Index colId) const
|
||||
{
|
||||
return internal::ploadt<PacketScalar, LoadMode>
|
||||
(m_data + (colId * colStride() + rowId * rowStride()));
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<int LoadMode>
|
||||
inline PacketScalar packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
|
||||
}
|
||||
|
||||
/** \internal Constructor for fixed size matrices or vectors */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||
checkSanity<Derived>();
|
||||
}
|
||||
|
||||
/** \internal Constructor for dynamically sized vectors */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index vecSize)
|
||||
: m_data(dataPtr),
|
||||
m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
|
||||
m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime))
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
eigen_assert(vecSize >= 0);
|
||||
eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
|
||||
checkSanity<Derived>();
|
||||
}
|
||||
|
||||
/** \internal Constructor for dynamically sized matrices */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline MapBase(PointerType dataPtr, Index rows, Index cols)
|
||||
: m_data(dataPtr), m_rows(rows), m_cols(cols)
|
||||
{
|
||||
eigen_assert( (dataPtr == 0)
|
||||
|| ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||
&& cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
|
||||
checkSanity<Derived>();
|
||||
}
|
||||
|
||||
#ifdef EIGEN_MAPBASE_PLUGIN
|
||||
#include EIGEN_MAPBASE_PLUGIN
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
|
||||
{
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
// innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
|
||||
const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
|
||||
eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|
||||
|| (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
|
||||
{}
|
||||
|
||||
PointerType m_data;
|
||||
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
|
||||
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
|
||||
};
|
||||
|
||||
/** \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for non-const dense Map and Block expression with direct access
|
||||
*
|
||||
* This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of
|
||||
* dense Map and Block objects with direct access.
|
||||
* It inherits MapBase<Derived, ReadOnlyAccessors> which defines the const variant for reading specific entries.
|
||||
*
|
||||
* \sa class Map, class Block
|
||||
*/
|
||||
template<typename Derived> class MapBase<Derived, WriteAccessors>
|
||||
: public MapBase<Derived, ReadOnlyAccessors>
|
||||
{
|
||||
typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
|
||||
public:
|
||||
|
||||
typedef MapBase<Derived, ReadOnlyAccessors> Base;
|
||||
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::PacketScalar PacketScalar;
|
||||
typedef typename Base::StorageIndex StorageIndex;
|
||||
typedef typename Base::PointerType PointerType;
|
||||
|
||||
using Base::derived;
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::coeff;
|
||||
using Base::coeffRef;
|
||||
|
||||
using Base::innerStride;
|
||||
using Base::outerStride;
|
||||
using Base::rowStride;
|
||||
using Base::colStride;
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<Derived>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar* data() const { return this->m_data; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
|
||||
{
|
||||
return this->m_data[col * colStride() + row * rowStride()];
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
return this->m_data[index * innerStride()];
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
inline void writePacket(Index row, Index col, const PacketScalar& val)
|
||||
{
|
||||
internal::pstoret<Scalar, PacketScalar, StoreMode>
|
||||
(this->m_data + (col * colStride() + row * rowStride()), val);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
inline void writePacket(Index index, const PacketScalar& val)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
|
||||
internal::pstoret<Scalar, PacketScalar, StoreMode>
|
||||
(this->m_data + index * innerStride(), val);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
|
||||
EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const MapBase& other)
|
||||
{
|
||||
ReadOnlyMapBase::Base::operator=(other);
|
||||
return derived();
|
||||
}
|
||||
|
||||
// In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
|
||||
// see bugs 821 and 920.
|
||||
using ReadOnlyMapBase::Base::operator=;
|
||||
};
|
||||
|
||||
#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MAPBASE_H
|
||||
1415
external/include/eigen3/Eigen/src/Core/MathFunctions.h
vendored
Normal file
1415
external/include/eigen3/Eigen/src/Core/MathFunctions.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
101
external/include/eigen3/Eigen/src/Core/MathFunctionsImpl.h
vendored
Normal file
101
external/include/eigen3/Eigen/src/Core/MathFunctionsImpl.h
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
|
||||
// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATHFUNCTIONSIMPL_H
|
||||
#define EIGEN_MATHFUNCTIONSIMPL_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
||||
is accurate up to a couple of ulp in the range [-9, 9], outside of which
|
||||
the tanh(x) = +/-1.
|
||||
|
||||
This implementation works on both scalars and packets.
|
||||
*/
|
||||
template<typename T>
|
||||
T generic_fast_tanh_float(const T& a_x)
|
||||
{
|
||||
// Clamp the inputs to the range [-9, 9] since anything outside
|
||||
// this range is +/-1.0f in single-precision.
|
||||
const T plus_9 = pset1<T>(9.f);
|
||||
const T minus_9 = pset1<T>(-9.f);
|
||||
// NOTE GCC prior to 6.3 might improperly optimize this max/min
|
||||
// step such that if a_x is nan, x will be either 9 or -9,
|
||||
// and tanh will return 1 or -1 instead of nan.
|
||||
// This is supposed to be fixed in gcc6.3,
|
||||
// see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
|
||||
const T x = pmax(minus_9,pmin(plus_9,a_x));
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
||||
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
||||
const T alpha_5 = pset1<T>(1.48572235717979e-05f);
|
||||
const T alpha_7 = pset1<T>(5.12229709037114e-08f);
|
||||
const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
|
||||
const T alpha_11 = pset1<T>(2.00018790482477e-13f);
|
||||
const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
|
||||
|
||||
// The monomial coefficients of the denominator polynomial (even).
|
||||
const T beta_0 = pset1<T>(4.89352518554385e-03f);
|
||||
const T beta_2 = pset1<T>(2.26843463243900e-03f);
|
||||
const T beta_4 = pset1<T>(1.18534705686654e-04f);
|
||||
const T beta_6 = pset1<T>(1.19825839466702e-06f);
|
||||
|
||||
// Since the polynomials are odd/even, we need x^2.
|
||||
const T x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial p.
|
||||
T p = pmadd(x2, alpha_13, alpha_11);
|
||||
p = pmadd(x2, p, alpha_9);
|
||||
p = pmadd(x2, p, alpha_7);
|
||||
p = pmadd(x2, p, alpha_5);
|
||||
p = pmadd(x2, p, alpha_3);
|
||||
p = pmadd(x2, p, alpha_1);
|
||||
p = pmul(x, p);
|
||||
|
||||
// Evaluate the denominator polynomial p.
|
||||
T q = pmadd(x2, beta_6, beta_4);
|
||||
q = pmadd(x2, q, beta_2);
|
||||
q = pmadd(x2, q, beta_0);
|
||||
|
||||
// Divide the numerator by the denominator.
|
||||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
template<typename RealScalar>
|
||||
EIGEN_STRONG_INLINE
|
||||
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
RealScalar p, qp;
|
||||
p = numext::maxi(x,y);
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
qp = numext::mini(y,x) / p;
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
|
||||
template<typename Scalar>
|
||||
struct hypot_impl
|
||||
{
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
static inline RealScalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(abs);
|
||||
return positive_real_hypot<RealScalar>(abs(x), abs(y));
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATHFUNCTIONSIMPL_H
|
||||
459
external/include/eigen3/Eigen/src/Core/Matrix.h
vendored
Normal file
459
external/include/eigen3/Eigen/src/Core/Matrix.h
vendored
Normal file
@@ -0,0 +1,459 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATRIX_H
|
||||
#define EIGEN_MATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
private:
|
||||
enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
|
||||
typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
|
||||
enum {
|
||||
row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
|
||||
is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
|
||||
max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
|
||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||
packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
public:
|
||||
typedef _Scalar Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef Eigen::Index StorageIndex;
|
||||
typedef MatrixXpr XprKind;
|
||||
enum {
|
||||
RowsAtCompileTime = _Rows,
|
||||
ColsAtCompileTime = _Cols,
|
||||
MaxRowsAtCompileTime = _MaxRows,
|
||||
MaxColsAtCompileTime = _MaxCols,
|
||||
Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
|
||||
Options = _Options,
|
||||
InnerStrideAtCompileTime = 1,
|
||||
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
|
||||
|
||||
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
|
||||
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
|
||||
Alignment = actual_alignment
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Matrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief The matrix class, also used for vectors and row-vectors
|
||||
*
|
||||
* The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen.
|
||||
* Vectors are matrices with one column, and row-vectors are matrices with one row.
|
||||
*
|
||||
* The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
|
||||
*
|
||||
* The first three template parameters are required:
|
||||
* \tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.
|
||||
* User defined scalar types are supported as well (see \ref user_defined_scalars "here").
|
||||
* \tparam _Rows Number of rows, or \b Dynamic
|
||||
* \tparam _Cols Number of columns, or \b Dynamic
|
||||
*
|
||||
* The remaining template parameters are optional -- in most cases you don't have to worry about them.
|
||||
* \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either
|
||||
* \b #AutoAlign or \b #DontAlign.
|
||||
* The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
|
||||
* for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
|
||||
* \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
|
||||
* \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note").
|
||||
*
|
||||
* Eigen provides a number of typedefs covering the usual cases. Here are some examples:
|
||||
*
|
||||
* \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix<double, 2, 2>)
|
||||
* \li \c Vector4f is a vector of 4 floats (\c Matrix<float, 4, 1>)
|
||||
* \li \c RowVector3i is a row-vector of 3 ints (\c Matrix<int, 1, 3>)
|
||||
*
|
||||
* \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix<float, Dynamic, Dynamic>)
|
||||
* \li \c VectorXf is a dynamic-size vector of floats (\c Matrix<float, Dynamic, 1>)
|
||||
*
|
||||
* \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix<float, 2, Dynamic>)
|
||||
* \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix<double, Dynamic, 3>)
|
||||
*
|
||||
* See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs.
|
||||
*
|
||||
* You can access elements of vectors and matrices using normal subscripting:
|
||||
*
|
||||
* \code
|
||||
* Eigen::VectorXd v(10);
|
||||
* v[0] = 0.1;
|
||||
* v[1] = 0.2;
|
||||
* v(0) = 0.3;
|
||||
* v(1) = 0.4;
|
||||
*
|
||||
* Eigen::MatrixXi m(10, 10);
|
||||
* m(0, 1) = 1;
|
||||
* m(0, 2) = 2;
|
||||
* m(0, 3) = 3;
|
||||
* \endcode
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
|
||||
*
|
||||
* <i><b>Some notes:</b></i>
|
||||
*
|
||||
* <dl>
|
||||
* <dt><b>\anchor dense Dense versus sparse:</b></dt>
|
||||
* <dd>This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the Sparse module.
|
||||
*
|
||||
* Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary contiguous array.
|
||||
* This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero coefficients.</dd>
|
||||
*
|
||||
* <dt><b>\anchor fixedsize Fixed-size versus dynamic-size:</b></dt>
|
||||
* <dd>Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates the array
|
||||
* of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, typically up to 4x4, sometimes up
|
||||
* to 16x16. Larger matrices should be declared as dynamic-size even if one happens to know their size at compile-time.
|
||||
*
|
||||
* Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they are runtime
|
||||
* variables, and the array of coefficients is allocated dynamically on the heap.
|
||||
*
|
||||
* Note that \em dense matrices, be they Fixed-size or Dynamic-size, <em>do not</em> expand dynamically in the sense of a std::map.
|
||||
* If you want this behavior, see the Sparse module.</dd>
|
||||
*
|
||||
* <dt><b>\anchor maxrows _MaxRows and _MaxCols:</b></dt>
|
||||
* <dd>In most cases, one just leaves these parameters to the default values.
|
||||
* These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
|
||||
* when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot
|
||||
* exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols
|
||||
* are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <i><b>ABI and storage layout</b></i>
|
||||
*
|
||||
* The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
|
||||
* <table class="manual">
|
||||
* <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
|
||||
* <tr><td>\code Matrix<T,Dynamic,Dynamic> \endcode</td><td>\code
|
||||
* struct {
|
||||
* T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
|
||||
* Eigen::Index rows, cols;
|
||||
* };
|
||||
* \endcode</td></tr>
|
||||
* <tr class="alt"><td>\code
|
||||
* Matrix<T,Dynamic,1>
|
||||
* Matrix<T,1,Dynamic> \endcode</td><td>\code
|
||||
* struct {
|
||||
* T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
|
||||
* Eigen::Index size;
|
||||
* };
|
||||
* \endcode</td></tr>
|
||||
* <tr><td>\code Matrix<T,Rows,Cols> \endcode</td><td>\code
|
||||
* struct {
|
||||
* T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0
|
||||
* };
|
||||
* \endcode</td></tr>
|
||||
* <tr class="alt"><td>\code Matrix<T,Dynamic,Dynamic,0,MaxRows,MaxCols> \endcode</td><td>\code
|
||||
* struct {
|
||||
* T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0
|
||||
* Eigen::Index rows, cols;
|
||||
* };
|
||||
* \endcode</td></tr>
|
||||
* </table>
|
||||
* Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
|
||||
* smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
|
||||
*
|
||||
* \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
|
||||
* \ref TopicStorageOrders
|
||||
*/
|
||||
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
|
||||
class Matrix
|
||||
: public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
|
||||
{
|
||||
public:
|
||||
|
||||
/** \brief Base class typedef.
|
||||
* \sa PlainObjectBase
|
||||
*/
|
||||
typedef PlainObjectBase<Matrix> Base;
|
||||
|
||||
enum { Options = _Options };
|
||||
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
|
||||
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
using Base::base;
|
||||
using Base::coeffRef;
|
||||
|
||||
/**
|
||||
* \brief Assigns matrices to each other.
|
||||
*
|
||||
* \note This is a special case of the templated operator=. Its purpose is
|
||||
* to prevent a default operator= from hiding the templated operator=.
|
||||
*
|
||||
* \callgraph
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* \brief Copies the value of the expression \a other into \c *this with automatic resizing.
|
||||
*
|
||||
* *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
|
||||
* it will be initialized.
|
||||
*
|
||||
* Note that copying a row-vector into a vector (and conversely) is allowed.
|
||||
* The resizing, if any, is then done in the appropriate way so that row-vectors
|
||||
* remain row-vectors and vectors remain vectors.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::_set(other);
|
||||
}
|
||||
|
||||
/* Here, doxygen failed to copy the brief information when using \copydoc */
|
||||
|
||||
/**
|
||||
* \brief Copies the generic expression \a other into *this.
|
||||
* \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
|
||||
{
|
||||
return Base::operator=(func);
|
||||
}
|
||||
|
||||
/** \brief Default constructor.
|
||||
*
|
||||
* For fixed-size matrices, does nothing.
|
||||
*
|
||||
* For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix
|
||||
* is called a null matrix. This constructor is the unique way to create null matrices: resizing
|
||||
* a matrix to 0 is not supported.
|
||||
*
|
||||
* \sa resize(Index,Index)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix() : Base()
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
||||
}
|
||||
|
||||
// FIXME is it still needed
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit Matrix(internal::constructor_without_unaligned_array_assert)
|
||||
: Base(internal::constructor_without_unaligned_array_assert())
|
||||
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
|
||||
|
||||
#if EIGEN_HAS_RVALUE_REFERENCES
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
|
||||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
{
|
||||
other.swap(*this);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
// This constructor is for both 1x1 matrices and dynamic vectors
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE explicit Matrix(const T& x)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init1<T>(x);
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
Base::template _init2<T0,T1>(x, y);
|
||||
}
|
||||
#else
|
||||
/** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit Matrix(const Scalar *data);
|
||||
|
||||
/** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
|
||||
*
|
||||
* This is useful for dynamic-size vectors. For fixed-size vectors,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Matrix() instead.
|
||||
*
|
||||
* \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
|
||||
* calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
|
||||
* For fixed-size \c 1x1 matrices it is therefore recommended to use the default
|
||||
* constructor Matrix() instead, especially when using one of the non standard
|
||||
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
|
||||
*/
|
||||
EIGEN_STRONG_INLINE explicit Matrix(Index dim);
|
||||
/** \brief Constructs an initialized 1x1 matrix with the given coefficient */
|
||||
Matrix(const Scalar& x);
|
||||
/** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
|
||||
*
|
||||
* This is useful for dynamic-size matrices. For fixed-size matrices,
|
||||
* it is redundant to pass these parameters, so one should use the default constructor
|
||||
* Matrix() instead.
|
||||
*
|
||||
* \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
|
||||
* calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
|
||||
* For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default
|
||||
* constructor Matrix() instead, especially when using one of the non standard
|
||||
* \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix(Index rows, Index cols);
|
||||
|
||||
/** \brief Constructs an initialized 2D vector with given coefficients */
|
||||
Matrix(const Scalar& x, const Scalar& y);
|
||||
#endif
|
||||
|
||||
/** \brief Constructs an initialized 3D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
|
||||
m_storage.data()[0] = x;
|
||||
m_storage.data()[1] = y;
|
||||
m_storage.data()[2] = z;
|
||||
}
|
||||
/** \brief Constructs an initialized 4D vector with given coefficients */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
|
||||
{
|
||||
Base::_check_template_params();
|
||||
EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
|
||||
m_storage.data()[0] = x;
|
||||
m_storage.data()[1] = y;
|
||||
m_storage.data()[2] = z;
|
||||
m_storage.data()[3] = w;
|
||||
}
|
||||
|
||||
|
||||
/** \brief Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
|
||||
{ }
|
||||
|
||||
/** \brief Copy constructor for generic expressions.
|
||||
* \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
|
||||
: Base(other.derived())
|
||||
{ }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
|
||||
|
||||
// allow to extend Matrix outside Eigen
|
||||
#ifdef EIGEN_MATRIX_PLUGIN
|
||||
#include EIGEN_MATRIX_PLUGIN
|
||||
#endif
|
||||
|
||||
protected:
|
||||
template <typename Derived, typename OtherDerived, bool IsVector>
|
||||
friend struct internal::conservative_resize_like_impl;
|
||||
|
||||
using Base::m_storage;
|
||||
};
|
||||
|
||||
/** \defgroup matrixtypedefs Global matrix typedefs
|
||||
*
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* Eigen defines several typedef shortcuts for most common matrix and vector types.
|
||||
*
|
||||
* The general patterns are the following:
|
||||
*
|
||||
* \c MatrixSizeType where \c Size can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size,
|
||||
* and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd
|
||||
* for complex double.
|
||||
*
|
||||
* For example, \c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \c MatrixXf is a dynamic-size matrix of floats.
|
||||
*
|
||||
* There are also \c VectorSizeType and \c RowVectorSizeType which are self-explanatory. For example, \c Vector4cf is
|
||||
* a fixed-size vector of 4 complex floats.
|
||||
*
|
||||
* \sa class Matrix
|
||||
*/
|
||||
|
||||
#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
|
||||
|
||||
#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
|
||||
/** \ingroup matrixtypedefs */ \
|
||||
typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
|
||||
|
||||
#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
|
||||
EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
|
||||
EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
|
||||
EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
|
||||
EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
|
||||
EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
|
||||
EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
|
||||
EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
|
||||
|
||||
EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i)
|
||||
EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f)
|
||||
EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d)
|
||||
EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
|
||||
EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
|
||||
|
||||
#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
|
||||
#undef EIGEN_MAKE_TYPEDEFS
|
||||
#undef EIGEN_MAKE_FIXED_TYPEDEFS
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATRIX_H
|
||||
529
external/include/eigen3/Eigen/src/Core/MatrixBase.h
vendored
Normal file
529
external/include/eigen3/Eigen/src/Core/MatrixBase.h
vendored
Normal file
@@ -0,0 +1,529 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATRIXBASE_H
|
||||
#define EIGEN_MATRIXBASE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class MatrixBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for all dense matrices, vectors, and expressions
|
||||
*
|
||||
* This class is the base that is inherited by all matrix, vector, and related expression
|
||||
* types. Most of the Eigen API is contained in this class, and its base classes. Other important
|
||||
* classes for the Eigen API are Matrix, and VectorwiseOp.
|
||||
*
|
||||
* Note that some methods are defined in other modules such as the \ref LU_Module LU module
|
||||
* for all functions related to matrix inversions.
|
||||
*
|
||||
* \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc.
|
||||
*
|
||||
* When writing a function taking Eigen objects as argument, if you want your function
|
||||
* to take as argument any matrix, vector, or expression, just let it take a
|
||||
* MatrixBase argument. As an example, here is a function printFirstRow which, given
|
||||
* a matrix, vector, or expression \a x, prints the first row of \a x.
|
||||
*
|
||||
* \code
|
||||
template<typename Derived>
|
||||
void printFirstRow(const Eigen::MatrixBase<Derived>& x)
|
||||
{
|
||||
cout << x.row(0) << endl;
|
||||
}
|
||||
* \endcode
|
||||
*
|
||||
* This class can be extended with the help of the plugin mechanism described on the page
|
||||
* \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
|
||||
*
|
||||
* \sa \blank \ref TopicClassHierarchy
|
||||
*/
|
||||
template<typename Derived> class MatrixBase
|
||||
: public DenseBase<Derived>
|
||||
{
|
||||
public:
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef MatrixBase StorageBaseType;
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
|
||||
typedef DenseBase<Derived> Base;
|
||||
using Base::RowsAtCompileTime;
|
||||
using Base::ColsAtCompileTime;
|
||||
using Base::SizeAtCompileTime;
|
||||
using Base::MaxRowsAtCompileTime;
|
||||
using Base::MaxColsAtCompileTime;
|
||||
using Base::MaxSizeAtCompileTime;
|
||||
using Base::IsVectorAtCompileTime;
|
||||
using Base::Flags;
|
||||
|
||||
using Base::derived;
|
||||
using Base::const_cast_derived;
|
||||
using Base::rows;
|
||||
using Base::cols;
|
||||
using Base::size;
|
||||
using Base::coeff;
|
||||
using Base::coeffRef;
|
||||
using Base::lazyAssign;
|
||||
using Base::eval;
|
||||
using Base::operator+=;
|
||||
using Base::operator-=;
|
||||
using Base::operator*=;
|
||||
using Base::operator/=;
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
|
||||
typedef typename Base::RowXpr RowXpr;
|
||||
typedef typename Base::ColXpr ColXpr;
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** type of the equivalent square matrix */
|
||||
typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
|
||||
EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
/** \returns the size of the main diagonal, which is min(rows(),cols()).
|
||||
* \sa rows(), cols(), SizeAtCompileTime. */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
|
||||
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal the return type of MatrixBase::adjoint() */
|
||||
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
||||
ConstTransposeReturnType
|
||||
>::type AdjointReturnType;
|
||||
/** \internal Return type of eigenvalues() */
|
||||
typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
|
||||
/** \internal the return type of identity */
|
||||
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
|
||||
/** \internal the return type of unit vectors */
|
||||
typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
|
||||
#define EIGEN_DOC_UNARY_ADDONS(X,Y)
|
||||
# include "../plugins/CommonCwiseUnaryOps.h"
|
||||
# include "../plugins/CommonCwiseBinaryOps.h"
|
||||
# include "../plugins/MatrixCwiseUnaryOps.h"
|
||||
# include "../plugins/MatrixCwiseBinaryOps.h"
|
||||
# ifdef EIGEN_MATRIXBASE_PLUGIN
|
||||
# include EIGEN_MATRIXBASE_PLUGIN
|
||||
# endif
|
||||
#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
|
||||
#undef EIGEN_DOC_UNARY_ADDONS
|
||||
|
||||
/** Special case of the template operator=, in order to prevent the compiler
|
||||
* from generating a default operator= (issue hit with g++ 4.1)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const MatrixBase& other);
|
||||
|
||||
// We cannot inherit here via Base::operator= since it is causing
|
||||
// trouble with MSVC.
|
||||
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const EigenBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& operator=(const ReturnByValue<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator+=(const MatrixBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,OtherDerived>
|
||||
operator*(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived,OtherDerived,LazyProduct>
|
||||
lazyProduct(const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
Derived& operator*=(const EigenBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
void applyOnTheLeft(const EigenBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
void applyOnTheRight(const EigenBase<OtherDerived>& other);
|
||||
|
||||
template<typename DiagonalDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<Derived, DiagonalDerived, LazyProduct>
|
||||
operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
|
||||
dot(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
|
||||
EIGEN_DEVICE_FUNC RealScalar norm() const;
|
||||
RealScalar stableNorm() const;
|
||||
RealScalar blueNorm() const;
|
||||
RealScalar hypotNorm() const;
|
||||
EIGEN_DEVICE_FUNC const PlainObject normalized() const;
|
||||
EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
|
||||
EIGEN_DEVICE_FUNC void normalize();
|
||||
EIGEN_DEVICE_FUNC void stableNormalize();
|
||||
|
||||
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
|
||||
EIGEN_DEVICE_FUNC void adjointInPlace();
|
||||
|
||||
typedef Diagonal<Derived> DiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalReturnType diagonal();
|
||||
|
||||
typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstDiagonalReturnType diagonal() const;
|
||||
|
||||
template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
|
||||
template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
|
||||
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename DiagonalIndexReturnType<Index>::Type diagonal();
|
||||
|
||||
template<int Index>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
|
||||
|
||||
typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
|
||||
typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DiagonalDynamicIndexReturnType diagonal(Index index);
|
||||
EIGEN_DEVICE_FUNC
|
||||
ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
|
||||
|
||||
template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
|
||||
template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
|
||||
|
||||
template<unsigned int Mode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename TriangularViewReturnType<Mode>::Type triangularView();
|
||||
template<unsigned int Mode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
|
||||
|
||||
template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
|
||||
template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
|
||||
|
||||
template<unsigned int UpLo>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
|
||||
template<unsigned int UpLo>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
|
||||
|
||||
const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
|
||||
const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
|
||||
EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
|
||||
EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
|
||||
EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const DiagonalWrapper<const Derived> asDiagonal() const;
|
||||
const PermutationWrapper<const Derived> asPermutation() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& setIdentity();
|
||||
EIGEN_DEVICE_FUNC
|
||||
Derived& setIdentity(Index rows, Index cols);
|
||||
|
||||
bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
bool isOrthogonal(const MatrixBase<OtherDerived>& other,
|
||||
const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
|
||||
|
||||
/** \returns true if each coefficients of \c *this and \a other are all exactly equal.
|
||||
* \warning When using floating point scalar values you probably should rather use a
|
||||
* fuzzy comparison such as isApprox()
|
||||
* \sa isApprox(), operator!= */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
|
||||
{ return cwiseEqual(other).all(); }
|
||||
|
||||
/** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
|
||||
* \warning When using floating point scalar values you probably should rather use a
|
||||
* fuzzy comparison such as isApprox()
|
||||
* \sa isApprox(), operator== */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
|
||||
{ return cwiseNotEqual(other).any(); }
|
||||
|
||||
NoAlias<Derived,Eigen::MatrixBase > noalias();
|
||||
|
||||
// TODO forceAlignedAccess is temporarily disabled
|
||||
// Need to find a nicer workaround.
|
||||
inline const Derived& forceAlignedAccess() const { return derived(); }
|
||||
inline Derived& forceAlignedAccess() { return derived(); }
|
||||
template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
|
||||
template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar trace() const;
|
||||
|
||||
template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
|
||||
|
||||
EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
|
||||
EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
|
||||
|
||||
/** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
|
||||
* \sa ArrayBase::matrix() */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
|
||||
/** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
|
||||
* \sa ArrayBase::matrix() */
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
|
||||
|
||||
/////////// LU module ///////////
|
||||
|
||||
inline const FullPivLU<PlainObject> fullPivLu() const;
|
||||
inline const PartialPivLU<PlainObject> partialPivLu() const;
|
||||
|
||||
inline const PartialPivLU<PlainObject> lu() const;
|
||||
|
||||
inline const Inverse<Derived> inverse() const;
|
||||
|
||||
template<typename ResultType>
|
||||
inline void computeInverseAndDetWithCheck(
|
||||
ResultType& inverse,
|
||||
typename ResultType::Scalar& determinant,
|
||||
bool& invertible,
|
||||
const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
|
||||
) const;
|
||||
template<typename ResultType>
|
||||
inline void computeInverseWithCheck(
|
||||
ResultType& inverse,
|
||||
bool& invertible,
|
||||
const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
|
||||
) const;
|
||||
Scalar determinant() const;
|
||||
|
||||
/////////// Cholesky module ///////////
|
||||
|
||||
inline const LLT<PlainObject> llt() const;
|
||||
inline const LDLT<PlainObject> ldlt() const;
|
||||
|
||||
/////////// QR module ///////////
|
||||
|
||||
inline const HouseholderQR<PlainObject> householderQr() const;
|
||||
inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
|
||||
inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
|
||||
inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
|
||||
|
||||
/////////// Eigenvalues module ///////////
|
||||
|
||||
inline EigenvaluesReturnType eigenvalues() const;
|
||||
inline RealScalar operatorNorm() const;
|
||||
|
||||
/////////// SVD module ///////////
|
||||
|
||||
inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
|
||||
inline BDCSVD<PlainObject> bdcSvd(unsigned int computationOptions = 0) const;
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/// \internal helper struct to form the return type of the cross product
|
||||
template<typename OtherDerived> struct cross_product_return_type {
|
||||
typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
|
||||
typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
|
||||
};
|
||||
#endif // EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
inline typename cross_product_return_type<OtherDerived>::type
|
||||
#else
|
||||
inline PlainObject
|
||||
#endif
|
||||
cross(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline PlainObject unitOrthogonal(void) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
|
||||
|
||||
// put this as separate enum value to work around possible GCC 4.3 bug (?)
|
||||
enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
|
||||
: ColsAtCompileTime==1 ? Vertical : Horizontal };
|
||||
typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline HomogeneousReturnType homogeneous() const;
|
||||
|
||||
enum {
|
||||
SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
|
||||
};
|
||||
typedef Block<const Derived,
|
||||
internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
|
||||
internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
|
||||
typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const HNormalizedReturnType hnormalized() const;
|
||||
|
||||
////////// Householder module ///////////
|
||||
|
||||
void makeHouseholderInPlace(Scalar& tau, RealScalar& beta);
|
||||
template<typename EssentialPart>
|
||||
void makeHouseholder(EssentialPart& essential,
|
||||
Scalar& tau, RealScalar& beta) const;
|
||||
template<typename EssentialPart>
|
||||
void applyHouseholderOnTheLeft(const EssentialPart& essential,
|
||||
const Scalar& tau,
|
||||
Scalar* workspace);
|
||||
template<typename EssentialPart>
|
||||
void applyHouseholderOnTheRight(const EssentialPart& essential,
|
||||
const Scalar& tau,
|
||||
Scalar* workspace);
|
||||
|
||||
///////// Jacobi module /////////
|
||||
|
||||
template<typename OtherScalar>
|
||||
void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
|
||||
template<typename OtherScalar>
|
||||
void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
|
||||
|
||||
///////// SparseCore module /////////
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
|
||||
cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
return other.cwiseProduct(derived());
|
||||
}
|
||||
|
||||
///////// MatrixFunctions module /////////
|
||||
|
||||
typedef typename internal::stem_function<Scalar>::type StemFunction;
|
||||
#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \
|
||||
/** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
|
||||
const ReturnType<Derived> Name() const;
|
||||
#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \
|
||||
/** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
|
||||
const ReturnType<Derived> Name(Argument) const;
|
||||
|
||||
EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential)
|
||||
/** \brief Helper function for the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>.*/
|
||||
const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)
|
||||
EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p)
|
||||
EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
|
||||
|
||||
private:
|
||||
EIGEN_DEVICE_FUNC explicit MatrixBase(int);
|
||||
EIGEN_DEVICE_FUNC MatrixBase(int,int);
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
|
||||
protected:
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
// mixing arrays and matrices is not legal
|
||||
template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
|
||||
{EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
|
||||
};
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of matrix base methods
|
||||
***************************************************************************/
|
||||
|
||||
/** replaces \c *this by \c *this * \a other.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*
|
||||
* Example: \include MatrixBase_applyOnTheRight.cpp
|
||||
* Output: \verbinclude MatrixBase_applyOnTheRight.out
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline Derived&
|
||||
MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheRight(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
|
||||
*
|
||||
* Example: \include MatrixBase_applyOnTheRight.cpp
|
||||
* Output: \verbinclude MatrixBase_applyOnTheRight.out
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheRight(derived());
|
||||
}
|
||||
|
||||
/** replaces \c *this by \a other * \c *this.
|
||||
*
|
||||
* Example: \include MatrixBase_applyOnTheLeft.cpp
|
||||
* Output: \verbinclude MatrixBase_applyOnTheLeft.out
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
other.derived().applyThisOnTheLeft(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATRIXBASE_H
|
||||
110
external/include/eigen3/Eigen/src/Core/NestByValue.h
vendored
Normal file
110
external/include/eigen3/Eigen/src/Core/NestByValue.h
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_NESTBYVALUE_H
|
||||
#define EIGEN_NESTBYVALUE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename ExpressionType>
|
||||
struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
|
||||
{};
|
||||
}
|
||||
|
||||
/** \class NestByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression which must be nested by value
|
||||
*
|
||||
* \tparam ExpressionType the type of the object of which we are requiring nesting-by-value
|
||||
*
|
||||
* This class is the return type of MatrixBase::nestByValue()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::nestByValue()
|
||||
*/
|
||||
template<typename ExpressionType> class NestByValue
|
||||
: public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<NestByValue>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_expression.coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_expression.coeff(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_expression.const_cast_derived().coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(row, col);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return m_expression.template packet<LoadMode>(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& x)
|
||||
{
|
||||
m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
|
||||
|
||||
protected:
|
||||
const ExpressionType m_expression;
|
||||
};
|
||||
|
||||
/** \returns an expression of the temporary version of *this.
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const NestByValue<Derived>
|
||||
DenseBase<Derived>::nestByValue() const
|
||||
{
|
||||
return NestByValue<Derived>(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_NESTBYVALUE_H
|
||||
108
external/include/eigen3/Eigen/src/Core/NoAlias.h
vendored
Normal file
108
external/include/eigen3/Eigen/src/Core/NoAlias.h
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_NOALIAS_H
|
||||
#define EIGEN_NOALIAS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class NoAlias
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Pseudo expression providing an operator = assuming no aliasing
|
||||
*
|
||||
* \tparam ExpressionType the type of the object on which to do the lazy assignment
|
||||
*
|
||||
* This class represents an expression with special assignment operators
|
||||
* assuming no aliasing between the target expression and the source expression.
|
||||
* More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression.
|
||||
* It is the return type of MatrixBase::noalias()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::noalias()
|
||||
*/
|
||||
template<typename ExpressionType, template <typename> class StorageBase>
|
||||
class NoAlias
|
||||
{
|
||||
public:
|
||||
typedef typename ExpressionType::Scalar Scalar;
|
||||
|
||||
explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& expression() const
|
||||
{
|
||||
return m_expression;
|
||||
}
|
||||
|
||||
protected:
|
||||
ExpressionType& m_expression;
|
||||
};
|
||||
|
||||
/** \returns a pseudo expression of \c *this with an operator= assuming
|
||||
* no aliasing between \c *this and the source expression.
|
||||
*
|
||||
* More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag.
|
||||
* Currently, even though several expressions may alias, only product
|
||||
* expressions have this flag. Therefore, noalias() is only usefull when
|
||||
* the source expression contains a matrix product.
|
||||
*
|
||||
* Here are some examples where noalias is usefull:
|
||||
* \code
|
||||
* D.noalias() = A * B;
|
||||
* D.noalias() += A.transpose() * B;
|
||||
* D.noalias() -= 2 * A * B.adjoint();
|
||||
* \endcode
|
||||
*
|
||||
* On the other hand the following example will lead to a \b wrong result:
|
||||
* \code
|
||||
* A.noalias() = A * B;
|
||||
* \endcode
|
||||
* because the result matrix A is also an operand of the matrix product. Therefore,
|
||||
* there is no alternative than evaluating A * B in a temporary, that is the default
|
||||
* behavior when you write:
|
||||
* \code
|
||||
* A = A * B;
|
||||
* \endcode
|
||||
*
|
||||
* \sa class NoAlias
|
||||
*/
|
||||
template<typename Derived>
|
||||
NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
|
||||
{
|
||||
return NoAlias<Derived, Eigen::MatrixBase >(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_NOALIAS_H
|
||||
248
external/include/eigen3/Eigen/src/Core/NumTraits.h
vendored
Normal file
248
external/include/eigen3/Eigen/src/Core/NumTraits.h
vendored
Normal file
@@ -0,0 +1,248 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_NUMTRAITS_H
|
||||
#define EIGEN_NUMTRAITS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// default implementation of digits10(), based on numeric_limits if specialized,
|
||||
// 0 for integer types, and log10(epsilon()) otherwise.
|
||||
template< typename T,
|
||||
bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
|
||||
bool is_integer = NumTraits<T>::IsInteger>
|
||||
struct default_digits10_impl
|
||||
{
|
||||
static int run() { return std::numeric_limits<T>::digits10; }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct default_digits10_impl<T,false,false> // Floating point
|
||||
{
|
||||
static int run() {
|
||||
using std::log10;
|
||||
using std::ceil;
|
||||
typedef typename NumTraits<T>::Real Real;
|
||||
return int(ceil(-log10(NumTraits<Real>::epsilon())));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct default_digits10_impl<T,false,true> // Integer
|
||||
{
|
||||
static int run() { return 0; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class NumTraits
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
|
||||
*
|
||||
* \tparam T the numeric type at hand
|
||||
*
|
||||
* This class stores enums, typedefs and static methods giving information about a numeric type.
|
||||
*
|
||||
* The provided data consists of:
|
||||
* \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
|
||||
* then \c Real is just a typedef to \a T. If \a T is \c std::complex<U> then \c Real
|
||||
* is a typedef to \a U.
|
||||
* \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
|
||||
* such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
|
||||
* \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
|
||||
* take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
|
||||
* only intended as a helper for code that needs to explicitly promote types.
|
||||
* \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex<U>, Literal is defined as \c U.
|
||||
* Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
|
||||
* \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
|
||||
* this means, just use \a T here.
|
||||
* \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
|
||||
* type, and to 0 otherwise.
|
||||
* \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int,
|
||||
* and to \c 0 otherwise.
|
||||
* \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed
|
||||
* to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
|
||||
* Stay vague here. No need to do architecture-specific stuff.
|
||||
* \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
|
||||
* \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
|
||||
* be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
|
||||
* \li An epsilon() function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">std::numeric_limits::epsilon()</a>,
|
||||
* it returns a \a Real instead of a \a T.
|
||||
* \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
|
||||
* value by the fuzzy comparison operators.
|
||||
* \li highest() and lowest() functions returning the highest and lowest possible values respectively.
|
||||
* \li digits10() function returning the number of decimal digits that can be represented without change. This is
|
||||
* the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
|
||||
* which is used as the default implementation if specialized.
|
||||
*/
|
||||
|
||||
template<typename T> struct GenericNumTraits
|
||||
{
|
||||
enum {
|
||||
IsInteger = std::numeric_limits<T>::is_integer,
|
||||
IsSigned = std::numeric_limits<T>::is_signed,
|
||||
IsComplex = 0,
|
||||
RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,
|
||||
ReadCost = 1,
|
||||
AddCost = 1,
|
||||
MulCost = 1
|
||||
};
|
||||
|
||||
typedef T Real;
|
||||
typedef typename internal::conditional<
|
||||
IsInteger,
|
||||
typename internal::conditional<sizeof(T)<=2, float, double>::type,
|
||||
T
|
||||
>::type NonInteger;
|
||||
typedef T Nested;
|
||||
typedef T Literal;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real epsilon()
|
||||
{
|
||||
return numext::numeric_limits<T>::epsilon();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline int digits10()
|
||||
{
|
||||
return internal::default_digits10_impl<T>::run();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real dummy_precision()
|
||||
{
|
||||
// make sure to override this for floating-point types
|
||||
return Real(0);
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T highest() {
|
||||
return (numext::numeric_limits<T>::max)();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T lowest() {
|
||||
return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T infinity() {
|
||||
return numext::numeric_limits<T>::infinity();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline T quiet_NaN() {
|
||||
return numext::numeric_limits<T>::quiet_NaN();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct NumTraits : GenericNumTraits<T>
|
||||
{};
|
||||
|
||||
template<> struct NumTraits<float>
|
||||
: GenericNumTraits<float>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline float dummy_precision() { return 1e-5f; }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<double> : GenericNumTraits<double>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline double dummy_precision() { return 1e-12; }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<long double>
|
||||
: GenericNumTraits<long double>
|
||||
{
|
||||
static inline long double dummy_precision() { return 1e-15l; }
|
||||
};
|
||||
|
||||
template<typename _Real> struct NumTraits<std::complex<_Real> >
|
||||
: GenericNumTraits<std::complex<_Real> >
|
||||
{
|
||||
typedef _Real Real;
|
||||
typedef typename NumTraits<_Real>::Literal Literal;
|
||||
enum {
|
||||
IsComplex = 1,
|
||||
RequireInitialization = NumTraits<_Real>::RequireInitialization,
|
||||
ReadCost = 2 * NumTraits<_Real>::ReadCost,
|
||||
AddCost = 2 * NumTraits<Real>::AddCost,
|
||||
MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline int digits10() { return NumTraits<Real>::digits10(); }
|
||||
};
|
||||
|
||||
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||
struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
|
||||
{
|
||||
typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> ArrayType;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef Array<RealScalar, Rows, Cols, Options, MaxRows, MaxCols> Real;
|
||||
typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
|
||||
typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
|
||||
typedef ArrayType & Nested;
|
||||
typedef typename NumTraits<Scalar>::Literal Literal;
|
||||
|
||||
enum {
|
||||
IsComplex = NumTraits<Scalar>::IsComplex,
|
||||
IsInteger = NumTraits<Scalar>::IsInteger,
|
||||
IsSigned = NumTraits<Scalar>::IsSigned,
|
||||
RequireInitialization = 1,
|
||||
ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
|
||||
AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
|
||||
MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
|
||||
|
||||
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
|
||||
};
|
||||
|
||||
template<> struct NumTraits<std::string>
|
||||
: GenericNumTraits<std::string>
|
||||
{
|
||||
enum {
|
||||
RequireInitialization = 1,
|
||||
ReadCost = HugeCost,
|
||||
AddCost = HugeCost,
|
||||
MulCost = HugeCost
|
||||
};
|
||||
|
||||
static inline int digits10() { return 0; }
|
||||
|
||||
private:
|
||||
static inline std::string epsilon();
|
||||
static inline std::string dummy_precision();
|
||||
static inline std::string lowest();
|
||||
static inline std::string highest();
|
||||
static inline std::string infinity();
|
||||
static inline std::string quiet_NaN();
|
||||
};
|
||||
|
||||
// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
|
||||
template<> struct NumTraits<void> {};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_NUMTRAITS_H
|
||||
633
external/include/eigen3/Eigen/src/Core/PermutationMatrix.h
vendored
Normal file
633
external/include/eigen3/Eigen/src/Core/PermutationMatrix.h
vendored
Normal file
@@ -0,0 +1,633 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2009-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PERMUTATIONMATRIX_H
|
||||
#define EIGEN_PERMUTATIONMATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
enum PermPermProduct_t {PermPermProduct};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class PermutationBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for permutations
|
||||
*
|
||||
* \tparam Derived the derived class
|
||||
*
|
||||
* This class is the base class for all expressions representing a permutation matrix,
|
||||
* internally stored as a vector of integers.
|
||||
* The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix
|
||||
* \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have:
|
||||
* \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f]
|
||||
* This convention ensures that for any two permutations \f$ \sigma, \tau \f$, we have:
|
||||
* \f[ P_{\sigma\circ\tau} = P_\sigma P_\tau. \f]
|
||||
*
|
||||
* Permutation matrices are square and invertible.
|
||||
*
|
||||
* Notice that in addition to the member functions and operators listed here, there also are non-member
|
||||
* operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase)
|
||||
* on either side.
|
||||
*
|
||||
* \sa class PermutationMatrix, class PermutationWrapper
|
||||
*/
|
||||
template<typename Derived>
|
||||
class PermutationBase : public EigenBase<Derived>
|
||||
{
|
||||
typedef internal::traits<Derived> Traits;
|
||||
typedef EigenBase<Derived> Base;
|
||||
public:
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
enum {
|
||||
Flags = Traits::Flags,
|
||||
RowsAtCompileTime = Traits::RowsAtCompileTime,
|
||||
ColsAtCompileTime = Traits::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
|
||||
};
|
||||
typedef typename Traits::StorageIndex StorageIndex;
|
||||
typedef Matrix<StorageIndex,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
|
||||
DenseMatrixType;
|
||||
typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndex>
|
||||
PlainPermutationType;
|
||||
typedef PlainPermutationType PlainObject;
|
||||
using Base::derived;
|
||||
typedef Inverse<Derived> InverseReturnType;
|
||||
typedef void Scalar;
|
||||
#endif
|
||||
|
||||
/** Copies the other permutation into *this */
|
||||
template<typename OtherDerived>
|
||||
Derived& operator=(const PermutationBase<OtherDerived>& other)
|
||||
{
|
||||
indices() = other.indices();
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** Assignment from the Transpositions \a tr */
|
||||
template<typename OtherDerived>
|
||||
Derived& operator=(const TranspositionsBase<OtherDerived>& tr)
|
||||
{
|
||||
setIdentity(tr.size());
|
||||
for(Index k=size()-1; k>=0; --k)
|
||||
applyTranspositionOnTheRight(k,tr.coeff(k));
|
||||
return derived();
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
Derived& operator=(const PermutationBase& other)
|
||||
{
|
||||
indices() = other.indices();
|
||||
return derived();
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \returns the number of rows */
|
||||
inline Index rows() const { return Index(indices().size()); }
|
||||
|
||||
/** \returns the number of columns */
|
||||
inline Index cols() const { return Index(indices().size()); }
|
||||
|
||||
/** \returns the size of a side of the respective square matrix, i.e., the number of indices */
|
||||
inline Index size() const { return Index(indices().size()); }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename DenseDerived>
|
||||
void evalTo(MatrixBase<DenseDerived>& other) const
|
||||
{
|
||||
other.setZero();
|
||||
for (Index i=0; i<rows(); ++i)
|
||||
other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \returns a Matrix object initialized from this permutation matrix. Notice that it
|
||||
* is inefficient to return this Matrix object by value. For efficiency, favor using
|
||||
* the Matrix constructor taking EigenBase objects.
|
||||
*/
|
||||
DenseMatrixType toDenseMatrix() const
|
||||
{
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return derived().indices(); }
|
||||
/** \returns a reference to the stored array representing the permutation. */
|
||||
IndicesType& indices() { return derived().indices(); }
|
||||
|
||||
/** Resizes to given size.
|
||||
*/
|
||||
inline void resize(Index newSize)
|
||||
{
|
||||
indices().resize(newSize);
|
||||
}
|
||||
|
||||
/** Sets *this to be the identity permutation matrix */
|
||||
void setIdentity()
|
||||
{
|
||||
StorageIndex n = StorageIndex(size());
|
||||
for(StorageIndex i = 0; i < n; ++i)
|
||||
indices().coeffRef(i) = i;
|
||||
}
|
||||
|
||||
/** Sets *this to be the identity permutation matrix of given size.
|
||||
*/
|
||||
void setIdentity(Index newSize)
|
||||
{
|
||||
resize(newSize);
|
||||
setIdentity();
|
||||
}
|
||||
|
||||
/** Multiplies *this by the transposition \f$(ij)\f$ on the left.
|
||||
*
|
||||
* \returns a reference to *this.
|
||||
*
|
||||
* \warning This is much slower than applyTranspositionOnTheRight(Index,Index):
|
||||
* this has linear complexity and requires a lot of branching.
|
||||
*
|
||||
* \sa applyTranspositionOnTheRight(Index,Index)
|
||||
*/
|
||||
Derived& applyTranspositionOnTheLeft(Index i, Index j)
|
||||
{
|
||||
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
|
||||
for(Index k = 0; k < size(); ++k)
|
||||
{
|
||||
if(indices().coeff(k) == i) indices().coeffRef(k) = StorageIndex(j);
|
||||
else if(indices().coeff(k) == j) indices().coeffRef(k) = StorageIndex(i);
|
||||
}
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** Multiplies *this by the transposition \f$(ij)\f$ on the right.
|
||||
*
|
||||
* \returns a reference to *this.
|
||||
*
|
||||
* This is a fast operation, it only consists in swapping two indices.
|
||||
*
|
||||
* \sa applyTranspositionOnTheLeft(Index,Index)
|
||||
*/
|
||||
Derived& applyTranspositionOnTheRight(Index i, Index j)
|
||||
{
|
||||
eigen_assert(i>=0 && j>=0 && i<size() && j<size());
|
||||
std::swap(indices().coeffRef(i), indices().coeffRef(j));
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \returns the inverse permutation matrix.
|
||||
*
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
inline InverseReturnType inverse() const
|
||||
{ return InverseReturnType(derived()); }
|
||||
/** \returns the tranpose permutation matrix.
|
||||
*
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
inline InverseReturnType transpose() const
|
||||
{ return InverseReturnType(derived()); }
|
||||
|
||||
/**** multiplication helpers to hopefully get RVO ****/
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
protected:
|
||||
template<typename OtherDerived>
|
||||
void assignTranspose(const PermutationBase<OtherDerived>& other)
|
||||
{
|
||||
for (Index i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
|
||||
}
|
||||
template<typename Lhs,typename Rhs>
|
||||
void assignProduct(const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
eigen_assert(lhs.cols() == rhs.rows());
|
||||
for (Index i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
/** \returns the product permutation matrix.
|
||||
*
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
template<typename Other>
|
||||
inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
|
||||
{ return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); }
|
||||
|
||||
/** \returns the product of a permutation with another inverse permutation.
|
||||
*
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
template<typename Other>
|
||||
inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const
|
||||
{ return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); }
|
||||
|
||||
/** \returns the product of an inverse permutation with another permutation.
|
||||
*
|
||||
* \note \blank \note_try_to_help_rvo
|
||||
*/
|
||||
template<typename Other> friend
|
||||
inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm)
|
||||
{ return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
|
||||
|
||||
/** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.
|
||||
*
|
||||
* This function is O(\c n) procedure allocating a buffer of \c n booleans.
|
||||
*/
|
||||
Index determinant() const
|
||||
{
|
||||
Index res = 1;
|
||||
Index n = size();
|
||||
Matrix<bool,RowsAtCompileTime,1,0,MaxRowsAtCompileTime> mask(n);
|
||||
mask.fill(false);
|
||||
Index r = 0;
|
||||
while(r < n)
|
||||
{
|
||||
// search for the next seed
|
||||
while(r<n && mask[r]) r++;
|
||||
if(r>=n)
|
||||
break;
|
||||
// we got one, let's follow it until we are back to the seed
|
||||
Index k0 = r++;
|
||||
mask.coeffRef(k0) = true;
|
||||
for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k))
|
||||
{
|
||||
mask.coeffRef(k) = true;
|
||||
res = -res;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
|
||||
: traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef PermutationStorage StorageKind;
|
||||
typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef void Scalar;
|
||||
};
|
||||
}
|
||||
|
||||
/** \class PermutationMatrix
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Permutation matrix
|
||||
*
|
||||
* \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
|
||||
* \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
* \tparam _StorageIndex the integer type of the indices
|
||||
*
|
||||
* This class represents a permutation matrix, internally stored as a vector of integers.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
|
||||
*/
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
|
||||
{
|
||||
typedef PermutationBase<PermutationMatrix> Base;
|
||||
typedef internal::traits<PermutationMatrix> Traits;
|
||||
public:
|
||||
|
||||
typedef const PermutationMatrix& Nested;
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename Traits::StorageIndex StorageIndex;
|
||||
#endif
|
||||
|
||||
inline PermutationMatrix()
|
||||
{}
|
||||
|
||||
/** Constructs an uninitialized permutation matrix of given size.
|
||||
*/
|
||||
explicit inline PermutationMatrix(Index size) : m_indices(size)
|
||||
{
|
||||
eigen_internal_assert(size <= NumTraits<StorageIndex>::highest());
|
||||
}
|
||||
|
||||
/** Copy constructor. */
|
||||
template<typename OtherDerived>
|
||||
inline PermutationMatrix(const PermutationBase<OtherDerived>& other)
|
||||
: m_indices(other.indices()) {}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** Standard copy constructor. Defined only to prevent a default copy constructor
|
||||
* from hiding the other templated constructor */
|
||||
inline PermutationMatrix(const PermutationMatrix& other) : m_indices(other.indices()) {}
|
||||
#endif
|
||||
|
||||
/** Generic constructor from expression of the indices. The indices
|
||||
* array has the meaning that the permutations sends each integer i to indices[i].
|
||||
*
|
||||
* \warning It is your responsibility to check that the indices array that you passes actually
|
||||
* describes a permutation, i.e., each value between 0 and n-1 occurs exactly once, where n is the
|
||||
* array's size.
|
||||
*/
|
||||
template<typename Other>
|
||||
explicit inline PermutationMatrix(const MatrixBase<Other>& indices) : m_indices(indices)
|
||||
{}
|
||||
|
||||
/** Convert the Transpositions \a tr to a permutation matrix */
|
||||
template<typename Other>
|
||||
explicit PermutationMatrix(const TranspositionsBase<Other>& tr)
|
||||
: m_indices(tr.size())
|
||||
{
|
||||
*this = tr;
|
||||
}
|
||||
|
||||
/** Copies the other permutation into *this */
|
||||
template<typename Other>
|
||||
PermutationMatrix& operator=(const PermutationBase<Other>& other)
|
||||
{
|
||||
m_indices = other.indices();
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Assignment from the Transpositions \a tr */
|
||||
template<typename Other>
|
||||
PermutationMatrix& operator=(const TranspositionsBase<Other>& tr)
|
||||
{
|
||||
return Base::operator=(tr.derived());
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
PermutationMatrix& operator=(const PermutationMatrix& other)
|
||||
{
|
||||
m_indices = other.m_indices;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return m_indices; }
|
||||
/** \returns a reference to the stored array representing the permutation. */
|
||||
IndicesType& indices() { return m_indices; }
|
||||
|
||||
|
||||
/**** multiplication helpers to hopefully get RVO ****/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Other>
|
||||
PermutationMatrix(const InverseImpl<Other,PermutationStorage>& other)
|
||||
: m_indices(other.derived().nestedExpression().size())
|
||||
{
|
||||
eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndex>::highest());
|
||||
StorageIndex end = StorageIndex(m_indices.size());
|
||||
for (StorageIndex i=0; i<end;++i)
|
||||
m_indices.coeffRef(other.derived().nestedExpression().indices().coeff(i)) = i;
|
||||
}
|
||||
template<typename Lhs,typename Rhs>
|
||||
PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
|
||||
: m_indices(lhs.indices().size())
|
||||
{
|
||||
Base::assignProduct(lhs,rhs);
|
||||
}
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
IndicesType m_indices;
|
||||
};
|
||||
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
|
||||
struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
|
||||
: traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
|
||||
{
|
||||
typedef PermutationStorage StorageKind;
|
||||
typedef Map<const Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef void Scalar;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
|
||||
class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess>
|
||||
: public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
|
||||
{
|
||||
typedef PermutationBase<Map> Base;
|
||||
typedef internal::traits<Map> Traits;
|
||||
public:
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndex;
|
||||
#endif
|
||||
|
||||
inline Map(const StorageIndex* indicesPtr)
|
||||
: m_indices(indicesPtr)
|
||||
{}
|
||||
|
||||
inline Map(const StorageIndex* indicesPtr, Index size)
|
||||
: m_indices(indicesPtr,size)
|
||||
{}
|
||||
|
||||
/** Copies the other permutation into *this */
|
||||
template<typename Other>
|
||||
Map& operator=(const PermutationBase<Other>& other)
|
||||
{ return Base::operator=(other.derived()); }
|
||||
|
||||
/** Assignment from the Transpositions \a tr */
|
||||
template<typename Other>
|
||||
Map& operator=(const TranspositionsBase<Other>& tr)
|
||||
{ return Base::operator=(tr.derived()); }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
Map& operator=(const Map& other)
|
||||
{
|
||||
m_indices = other.m_indices;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return m_indices; }
|
||||
/** \returns a reference to the stored array representing the permutation. */
|
||||
IndicesType& indices() { return m_indices; }
|
||||
|
||||
protected:
|
||||
|
||||
IndicesType m_indices;
|
||||
};
|
||||
|
||||
template<typename _IndicesType> class TranspositionsWrapper;
|
||||
namespace internal {
|
||||
template<typename _IndicesType>
|
||||
struct traits<PermutationWrapper<_IndicesType> >
|
||||
{
|
||||
typedef PermutationStorage StorageKind;
|
||||
typedef void Scalar;
|
||||
typedef typename _IndicesType::Scalar StorageIndex;
|
||||
typedef _IndicesType IndicesType;
|
||||
enum {
|
||||
RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
|
||||
ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
|
||||
MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
|
||||
MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
|
||||
Flags = 0
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/** \class PermutationWrapper
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Class to view a vector of integers as a permutation matrix
|
||||
*
|
||||
* \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
|
||||
*
|
||||
* This class allows to view any vector expression of integers as a permutation matrix.
|
||||
*
|
||||
* \sa class PermutationBase, class PermutationMatrix
|
||||
*/
|
||||
template<typename _IndicesType>
|
||||
class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
|
||||
{
|
||||
typedef PermutationBase<PermutationWrapper> Base;
|
||||
typedef internal::traits<PermutationWrapper> Traits;
|
||||
public:
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
#endif
|
||||
|
||||
inline PermutationWrapper(const IndicesType& indices)
|
||||
: m_indices(indices)
|
||||
{}
|
||||
|
||||
/** const version of indices(). */
|
||||
const typename internal::remove_all<typename IndicesType::Nested>::type&
|
||||
indices() const { return m_indices; }
|
||||
|
||||
protected:
|
||||
|
||||
typename IndicesType::Nested m_indices;
|
||||
};
|
||||
|
||||
|
||||
/** \returns the matrix with the permutation applied to the columns.
|
||||
*/
|
||||
template<typename MatrixDerived, typename PermutationDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix,
|
||||
const PermutationBase<PermutationDerived>& permutation)
|
||||
{
|
||||
return Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
|
||||
(matrix.derived(), permutation.derived());
|
||||
}
|
||||
|
||||
/** \returns the matrix with the permutation applied to the rows.
|
||||
*/
|
||||
template<typename PermutationDerived, typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
|
||||
operator*(const PermutationBase<PermutationDerived> &permutation,
|
||||
const MatrixBase<MatrixDerived>& matrix)
|
||||
{
|
||||
return Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
|
||||
(permutation.derived(), matrix.derived());
|
||||
}
|
||||
|
||||
|
||||
template<typename PermutationType>
|
||||
class InverseImpl<PermutationType, PermutationStorage>
|
||||
: public EigenBase<Inverse<PermutationType> >
|
||||
{
|
||||
typedef typename PermutationType::PlainPermutationType PlainPermutationType;
|
||||
typedef internal::traits<PermutationType> PermTraits;
|
||||
protected:
|
||||
InverseImpl() {}
|
||||
public:
|
||||
typedef Inverse<PermutationType> InverseType;
|
||||
using EigenBase<Inverse<PermutationType> >::derived;
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
typedef typename PermutationType::DenseMatrixType DenseMatrixType;
|
||||
enum {
|
||||
RowsAtCompileTime = PermTraits::RowsAtCompileTime,
|
||||
ColsAtCompileTime = PermTraits::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename DenseDerived>
|
||||
void evalTo(MatrixBase<DenseDerived>& other) const
|
||||
{
|
||||
other.setZero();
|
||||
for (Index i=0; i<derived().rows();++i)
|
||||
other.coeffRef(i, derived().nestedExpression().indices().coeff(i)) = typename DenseDerived::Scalar(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \return the equivalent permutation matrix */
|
||||
PlainPermutationType eval() const { return derived(); }
|
||||
|
||||
DenseMatrixType toDenseMatrix() const { return derived(); }
|
||||
|
||||
/** \returns the matrix with the inverse permutation applied to the columns.
|
||||
*/
|
||||
template<typename OtherDerived> friend
|
||||
const Product<OtherDerived, InverseType, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix, const InverseType& trPerm)
|
||||
{
|
||||
return Product<OtherDerived, InverseType, AliasFreeProduct>(matrix.derived(), trPerm.derived());
|
||||
}
|
||||
|
||||
/** \returns the matrix with the inverse permutation applied to the rows.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
const Product<InverseType, OtherDerived, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix) const
|
||||
{
|
||||
return Product<InverseType, OtherDerived, AliasFreeProduct>(derived(), matrix.derived());
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() const
|
||||
{
|
||||
return derived();
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PERMUTATIONMATRIX_H
|
||||
1035
external/include/eigen3/Eigen/src/Core/PlainObjectBase.h
vendored
Normal file
1035
external/include/eigen3/Eigen/src/Core/PlainObjectBase.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
186
external/include/eigen3/Eigen/src/Core/Product.h
vendored
Normal file
186
external/include/eigen3/Eigen/src/Core/Product.h
vendored
Normal file
@@ -0,0 +1,186 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PRODUCT_H
|
||||
#define EIGEN_PRODUCT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option>
|
||||
struct traits<Product<Lhs, Rhs, Option> >
|
||||
{
|
||||
typedef typename remove_all<Lhs>::type LhsCleaned;
|
||||
typedef typename remove_all<Rhs>::type RhsCleaned;
|
||||
typedef traits<LhsCleaned> LhsTraits;
|
||||
typedef traits<RhsCleaned> RhsTraits;
|
||||
|
||||
typedef MatrixXpr XprKind;
|
||||
|
||||
typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
|
||||
typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
|
||||
typename RhsTraits::StorageKind,
|
||||
internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
|
||||
typedef typename promote_index_type<typename LhsTraits::StorageIndex,
|
||||
typename RhsTraits::StorageIndex>::type StorageIndex;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = LhsTraits::RowsAtCompileTime,
|
||||
ColsAtCompileTime = RhsTraits::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
|
||||
|
||||
// FIXME: only needed by GeneralMatrixMatrixTriangular
|
||||
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
|
||||
|
||||
// The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
|
||||
Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
|
||||
: (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
|
||||
: ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
|
||||
|| ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
|
||||
: NoPreferredStorageOrderBit
|
||||
};
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class Product
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the product of two arbitrary matrices or vectors
|
||||
*
|
||||
* \tparam _Lhs the type of the left-hand side expression
|
||||
* \tparam _Rhs the type of the right-hand side expression
|
||||
*
|
||||
* This class represents an expression of the product of two arbitrary matrices.
|
||||
*
|
||||
* The other template parameters are:
|
||||
* \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct
|
||||
*
|
||||
*/
|
||||
template<typename _Lhs, typename _Rhs, int Option>
|
||||
class Product : public ProductImpl<_Lhs,_Rhs,Option,
|
||||
typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
|
||||
typename internal::traits<_Rhs>::StorageKind,
|
||||
internal::product_type<_Lhs,_Rhs>::ret>::ret>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef _Lhs Lhs;
|
||||
typedef _Rhs Rhs;
|
||||
|
||||
typedef typename ProductImpl<
|
||||
Lhs, Rhs, Option,
|
||||
typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
|
||||
typename internal::traits<Rhs>::StorageKind,
|
||||
internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||
|
||||
typedef typename internal::ref_selector<Lhs>::type LhsNested;
|
||||
typedef typename internal::ref_selector<Rhs>::type RhsNested;
|
||||
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
|
||||
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
|
||||
|
||||
EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
|
||||
{
|
||||
eigen_assert(lhs.cols() == rhs.rows()
|
||||
&& "invalid matrix product"
|
||||
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
|
||||
EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
LhsNested m_lhs;
|
||||
RhsNested m_rhs;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
|
||||
class dense_product_base
|
||||
: public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
|
||||
{};
|
||||
|
||||
/** Convertion to scalar for inner-products */
|
||||
template<typename Lhs, typename Rhs, int Option>
|
||||
class dense_product_base<Lhs, Rhs, Option, InnerProduct>
|
||||
: public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Option> ProductXpr;
|
||||
typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
|
||||
public:
|
||||
using Base::derived;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
|
||||
EIGEN_STRONG_INLINE operator const Scalar() const
|
||||
{
|
||||
return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// Generic API dispatcher
|
||||
template<typename Lhs, typename Rhs, int Option, typename StorageKind>
|
||||
class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
|
||||
{
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int Option>
|
||||
class ProductImpl<Lhs,Rhs,Option,Dense>
|
||||
: public internal::dense_product_base<Lhs,Rhs,Option>
|
||||
{
|
||||
typedef Product<Lhs, Rhs, Option> Derived;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
protected:
|
||||
enum {
|
||||
IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) &&
|
||||
(ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
|
||||
EnableCoeff = IsOneByOne || Option==LazyProduct
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
|
||||
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
|
||||
|
||||
return internal::evaluator<Derived>(derived()).coeff(row,col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
|
||||
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
|
||||
|
||||
return internal::evaluator<Derived>(derived()).coeff(i);
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
1112
external/include/eigen3/Eigen/src/Core/ProductEvaluators.h
vendored
Normal file
1112
external/include/eigen3/Eigen/src/Core/ProductEvaluators.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
182
external/include/eigen3/Eigen/src/Core/Random.h
vendored
Normal file
182
external/include/eigen3/Eigen/src/Core/Random.h
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_RANDOM_H
|
||||
#define EIGEN_RANDOM_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Scalar> struct scalar_random_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
|
||||
inline const Scalar operator() () const { return random<Scalar>(); }
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_random_op<Scalar> >
|
||||
{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns a random matrix expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* The parameters \a rows and \a cols are the number of rows and of columns of
|
||||
* the returned matrix. Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
|
||||
* it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
|
||||
* instead.
|
||||
*
|
||||
*
|
||||
* Example: \include MatrixBase_random_int_int.cpp
|
||||
* Output: \verbinclude MatrixBase_random_int_int.out
|
||||
*
|
||||
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
|
||||
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
|
||||
*
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
{
|
||||
return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
|
||||
}
|
||||
|
||||
/** \returns a random vector expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* The parameter \a size is the size of the returned vector.
|
||||
* Must be compatible with this MatrixBase type.
|
||||
*
|
||||
* \only_for_vectors
|
||||
* \not_reentrant
|
||||
*
|
||||
* This variant is meant to be used for dynamic-size vector types. For fixed-size types,
|
||||
* it is redundant to pass \a size as argument, so Random() should be used
|
||||
* instead.
|
||||
*
|
||||
* Example: \include MatrixBase_random_int.cpp
|
||||
* Output: \verbinclude MatrixBase_random_int.out
|
||||
*
|
||||
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
|
||||
* a temporary vector whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random(Index size)
|
||||
{
|
||||
return NullaryExpr(size, internal::scalar_random_op<Scalar>());
|
||||
}
|
||||
|
||||
/** \returns a fixed-size random matrix or vector expression
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
|
||||
* need to use the variants taking size arguments.
|
||||
*
|
||||
* Example: \include MatrixBase_random.cpp
|
||||
* Output: \verbinclude MatrixBase_random.out
|
||||
*
|
||||
* This expression has the "evaluate before nesting" flag so that it will be evaluated into
|
||||
* a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
|
||||
* behavior with expressions involving random matrices.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random()
|
||||
{
|
||||
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
|
||||
}
|
||||
|
||||
/** Sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* Example: \include MatrixBase_setRandom.cpp
|
||||
* Output: \verbinclude MatrixBase_setRandom.out
|
||||
*
|
||||
* \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline Derived& DenseBase<Derived>::setRandom()
|
||||
{
|
||||
return *this = Random(rows(), cols());
|
||||
}
|
||||
|
||||
/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \only_for_vectors
|
||||
* \not_reentrant
|
||||
*
|
||||
* Example: \include Matrix_setRandom_int.cpp
|
||||
* Output: \verbinclude Matrix_setRandom_int.out
|
||||
*
|
||||
* \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setRandom(Index newSize)
|
||||
{
|
||||
resize(newSize);
|
||||
return setRandom();
|
||||
}
|
||||
|
||||
/** Resizes to the given size, and sets all coefficients in this expression to random values.
|
||||
*
|
||||
* Numbers are uniformly spread through their whole definition range for integer types,
|
||||
* and in the [-1:1] range for floating point scalar types.
|
||||
*
|
||||
* \not_reentrant
|
||||
*
|
||||
* \param rows the new number of rows
|
||||
* \param cols the new number of columns
|
||||
*
|
||||
* Example: \include Matrix_setRandom_int_int.cpp
|
||||
* Output: \verbinclude Matrix_setRandom_int_int.out
|
||||
*
|
||||
* \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE Derived&
|
||||
PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
|
||||
{
|
||||
resize(rows, cols);
|
||||
return setRandom();
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_RANDOM_H
|
||||
505
external/include/eigen3/Eigen/src/Core/Redux.h
vendored
Normal file
505
external/include/eigen3/Eigen/src/Core/Redux.h
vendored
Normal file
@@ -0,0 +1,505 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_REDUX_H
|
||||
#define EIGEN_REDUX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// TODO
|
||||
// * implement other kind of vectorization
|
||||
// * factorize code
|
||||
|
||||
/***************************************************************************
|
||||
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct redux_traits
|
||||
{
|
||||
public:
|
||||
typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
|
||||
enum {
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
InnerMaxSize = int(Derived::IsRowMajor)
|
||||
? Derived::MaxColsAtCompileTime
|
||||
: Derived::MaxRowsAtCompileTime
|
||||
};
|
||||
|
||||
enum {
|
||||
MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
|
||||
&& (functor_traits<Func>::PacketAccess),
|
||||
MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
|
||||
MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||
: int(DefaultTraversal)
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
|
||||
: Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
|
||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
|
||||
};
|
||||
|
||||
public:
|
||||
enum {
|
||||
Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
|
||||
};
|
||||
|
||||
#ifdef EIGEN_DEBUG_ASSIGN
|
||||
static void debug()
|
||||
{
|
||||
std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
|
||||
std::cerr.setf(std::ios::hex, std::ios::basefield);
|
||||
EIGEN_DEBUG_VAR(Derived::Flags)
|
||||
std::cerr.unsetf(std::ios::hex);
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(PacketSize)
|
||||
EIGEN_DEBUG_VAR(MightVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||
EIGEN_DEBUG_VAR(MaySliceVectorize)
|
||||
EIGEN_DEBUG_VAR(Traversal)
|
||||
EIGEN_DEBUG_VAR(UnrollingLimit)
|
||||
EIGEN_DEBUG_VAR(Unrolling)
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 2 : unrollers
|
||||
***************************************************************************/
|
||||
|
||||
/*** no vectorization ***/
|
||||
|
||||
template<typename Func, typename Derived, int Start, int Length>
|
||||
struct redux_novec_unroller
|
||||
{
|
||||
enum {
|
||||
HalfLength = Length/2
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
|
||||
redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived, int Start>
|
||||
struct redux_novec_unroller<Func, Derived, Start, 1>
|
||||
{
|
||||
enum {
|
||||
outer = Start / Derived::InnerSizeAtCompileTime,
|
||||
inner = Start % Derived::InnerSizeAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.coeffByOuterInner(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
// This is actually dead code and will never be called. It is required
|
||||
// to prevent false warnings regarding failed inlining though
|
||||
// for 0 length run() will never be called at all.
|
||||
template<typename Func, typename Derived, int Start>
|
||||
struct redux_novec_unroller<Func, Derived, Start, 0>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
|
||||
};
|
||||
|
||||
/*** vectorization ***/
|
||||
|
||||
template<typename Func, typename Derived, int Start, int Length>
|
||||
struct redux_vec_unroller
|
||||
{
|
||||
enum {
|
||||
PacketSize = redux_traits<Func, Derived>::PacketSize,
|
||||
HalfLength = Length/2
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
||||
|
||||
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
return func.packetOp(
|
||||
redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
|
||||
redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived, int Start>
|
||||
struct redux_vec_unroller<Func, Derived, Start, 1>
|
||||
{
|
||||
enum {
|
||||
index = Start * redux_traits<Func, Derived>::PacketSize,
|
||||
outer = index / int(Derived::InnerSizeAtCompileTime),
|
||||
inner = index % int(Derived::InnerSizeAtCompileTime),
|
||||
alignment = Derived::Alignment
|
||||
};
|
||||
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
||||
|
||||
static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
|
||||
{
|
||||
return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Part 3 : implementation of all cases
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Func, typename Derived,
|
||||
int Traversal = redux_traits<Func, Derived>::Traversal,
|
||||
int Unrolling = redux_traits<Func, Derived>::Unrolling
|
||||
>
|
||||
struct redux_impl;
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
Scalar res;
|
||||
res = mat.coeffByOuterInner(0, 0);
|
||||
for(Index i = 1; i < mat.innerSize(); ++i)
|
||||
res = func(res, mat.coeffByOuterInner(0, i));
|
||||
for(Index i = 1; i < mat.outerSize(); ++i)
|
||||
for(Index j = 0; j < mat.innerSize(); ++j)
|
||||
res = func(res, mat.coeffByOuterInner(i, j));
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
|
||||
: public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
|
||||
{};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
||||
|
||||
static Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
const Index size = mat.size();
|
||||
|
||||
const Index packetSize = redux_traits<Func, Derived>::PacketSize;
|
||||
const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
|
||||
enum {
|
||||
alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
|
||||
alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
|
||||
};
|
||||
const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
|
||||
const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
|
||||
const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
|
||||
const Index alignedEnd2 = alignedStart + alignedSize2;
|
||||
const Index alignedEnd = alignedStart + alignedSize;
|
||||
Scalar res;
|
||||
if(alignedSize)
|
||||
{
|
||||
PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
|
||||
if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
|
||||
{
|
||||
PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
|
||||
for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
|
||||
{
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
|
||||
packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
|
||||
}
|
||||
|
||||
packet_res0 = func.packetOp(packet_res0,packet_res1);
|
||||
if(alignedEnd>alignedEnd2)
|
||||
packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
|
||||
}
|
||||
res = func.predux(packet_res0);
|
||||
|
||||
for(Index index = 0; index < alignedStart; ++index)
|
||||
res = func(res,mat.coeff(index));
|
||||
|
||||
for(Index index = alignedEnd; index < size; ++index)
|
||||
res = func(res,mat.coeff(index));
|
||||
}
|
||||
else // too small to vectorize anything.
|
||||
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
||||
{
|
||||
res = mat.coeff(0);
|
||||
for(Index index = 1; index < size; ++index)
|
||||
res = func(res,mat.coeff(index));
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
|
||||
template<typename Func, typename Derived, int Unrolling>
|
||||
struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketType;
|
||||
|
||||
EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
const Index innerSize = mat.innerSize();
|
||||
const Index outerSize = mat.outerSize();
|
||||
enum {
|
||||
packetSize = redux_traits<Func, Derived>::PacketSize
|
||||
};
|
||||
const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
||||
Scalar res;
|
||||
if(packetedInnerSize)
|
||||
{
|
||||
PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
|
||||
for(Index j=0; j<outerSize; ++j)
|
||||
for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
|
||||
packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
|
||||
|
||||
res = func.predux(packet_res);
|
||||
for(Index j=0; j<outerSize; ++j)
|
||||
for(Index i=packetedInnerSize; i<innerSize; ++i)
|
||||
res = func(res, mat.coeffByOuterInner(j,i));
|
||||
}
|
||||
else // too small to vectorize anything.
|
||||
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
||||
{
|
||||
res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Func, typename Derived>
|
||||
struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
|
||||
typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
|
||||
enum {
|
||||
PacketSize = redux_traits<Func, Derived>::PacketSize,
|
||||
Size = Derived::SizeAtCompileTime,
|
||||
VectorizedSize = (Size / PacketSize) * PacketSize
|
||||
};
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
|
||||
{
|
||||
eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
|
||||
if (VectorizedSize > 0) {
|
||||
Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
|
||||
if (VectorizedSize != Size)
|
||||
res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
|
||||
return res;
|
||||
}
|
||||
else {
|
||||
return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// evaluator adaptor
|
||||
template<typename _XprType>
|
||||
class redux_evaluator
|
||||
{
|
||||
public:
|
||||
typedef _XprType XprType;
|
||||
EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketScalar PacketScalar;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
|
||||
// TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
|
||||
Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
|
||||
IsRowMajor = XprType::IsRowMajor,
|
||||
SizeAtCompileTime = XprType::SizeAtCompileTime,
|
||||
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
|
||||
CoeffReadCost = evaluator<XprType>::CoeffReadCost,
|
||||
Alignment = evaluator<XprType>::Alignment
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
||||
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
|
||||
EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
|
||||
EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CoeffReturnType coeff(Index row, Index col) const
|
||||
{ return m_evaluator.coeff(row, col); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CoeffReturnType coeff(Index index) const
|
||||
{ return m_evaluator.coeff(index); }
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index row, Index col) const
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packet(Index index) const
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(index); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
|
||||
{ return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
|
||||
template<int LoadMode, typename PacketType>
|
||||
PacketType packetByOuterInner(Index outer, Index inner) const
|
||||
{ return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
|
||||
|
||||
const XprType & nestedExpression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
internal::evaluator<XprType> m_evaluator;
|
||||
const XprType &m_xpr;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***************************************************************************
|
||||
* Part 4 : public API
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
/** \returns the result of a full redux operation on the whole matrix or vector using \a func
|
||||
*
|
||||
* The template parameter \a BinaryOp is the type of the functor \a func which must be
|
||||
* an associative operator. Both current C++98 and C++11 functor styles are handled.
|
||||
*
|
||||
* \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename Func>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::redux(const Func& func) const
|
||||
{
|
||||
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
|
||||
|
||||
typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
|
||||
ThisEvaluator thisEval(derived());
|
||||
|
||||
return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
|
||||
}
|
||||
|
||||
/** \returns the minimum of all coefficients of \c *this.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::minCoeff() const
|
||||
{
|
||||
return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
|
||||
}
|
||||
|
||||
/** \returns the maximum of all coefficients of \c *this.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::maxCoeff() const
|
||||
{
|
||||
return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
|
||||
}
|
||||
|
||||
/** \returns the sum of all coefficients of \c *this
|
||||
*
|
||||
* If \c *this is empty, then the value 0 is returned.
|
||||
*
|
||||
* \sa trace(), prod(), mean()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::sum() const
|
||||
{
|
||||
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
||||
return Scalar(0);
|
||||
return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
|
||||
}
|
||||
|
||||
/** \returns the mean of all coefficients of *this
|
||||
*
|
||||
* \sa trace(), prod(), sum()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::mean() const
|
||||
{
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning push
|
||||
#pragma warning ( disable : 2259 )
|
||||
#endif
|
||||
return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
|
||||
#ifdef __INTEL_COMPILER
|
||||
#pragma warning pop
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \returns the product of all coefficients of *this
|
||||
*
|
||||
* Example: \include MatrixBase_prod.cpp
|
||||
* Output: \verbinclude MatrixBase_prod.out
|
||||
*
|
||||
* \sa sum(), mean(), trace()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::prod() const
|
||||
{
|
||||
if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
|
||||
return Scalar(1);
|
||||
return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
|
||||
}
|
||||
|
||||
/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
|
||||
*
|
||||
* \c *this can be any matrix, not necessarily square.
|
||||
*
|
||||
* \sa diagonal(), sum()
|
||||
*/
|
||||
template<typename Derived>
|
||||
EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::trace() const
|
||||
{
|
||||
return derived().diagonal().sum();
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_REDUX_H
|
||||
283
external/include/eigen3/Eigen/src/Core/Ref.h
vendored
Normal file
283
external/include/eigen3/Eigen/src/Core/Ref.h
vendored
Normal file
@@ -0,0 +1,283 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_REF_H
|
||||
#define EIGEN_REF_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename _PlainObjectType, int _Options, typename _StrideType>
|
||||
struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
|
||||
: public traits<Map<_PlainObjectType, _Options, _StrideType> >
|
||||
{
|
||||
typedef _PlainObjectType PlainObjectType;
|
||||
typedef _StrideType StrideType;
|
||||
enum {
|
||||
Options = _Options,
|
||||
Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
|
||||
Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
|
||||
};
|
||||
|
||||
template<typename Derived> struct match {
|
||||
enum {
|
||||
HasDirectAccess = internal::has_direct_access<Derived>::ret,
|
||||
StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
|
||||
InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic)
|
||||
|| int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime)
|
||||
|| (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
|
||||
OuterStrideMatch = Derived::IsVectorAtCompileTime
|
||||
|| int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
|
||||
// NOTE, this indirection of evaluator<Derived>::Alignment is needed
|
||||
// to workaround a very strange bug in MSVC related to the instantiation
|
||||
// of has_*ary_operator in evaluator<CwiseNullaryOp>.
|
||||
// This line is surprisingly very sensitive. For instance, simply adding parenthesis
|
||||
// as "DerivedAlignment = (int(evaluator<Derived>::Alignment))," will make MSVC fail...
|
||||
DerivedAlignment = int(evaluator<Derived>::Alignment),
|
||||
AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
|
||||
ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
|
||||
MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
|
||||
};
|
||||
typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
struct traits<RefBase<Derived> > : public traits<Derived> {};
|
||||
|
||||
}
|
||||
|
||||
template<typename Derived> class RefBase
|
||||
: public MapBase<Derived>
|
||||
{
|
||||
typedef typename internal::traits<Derived>::PlainObjectType PlainObjectType;
|
||||
typedef typename internal::traits<Derived>::StrideType StrideType;
|
||||
|
||||
public:
|
||||
|
||||
typedef MapBase<Derived> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const
|
||||
{
|
||||
return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const
|
||||
{
|
||||
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
|
||||
: IsVectorAtCompileTime ? this->size()
|
||||
: int(Flags)&RowMajorBit ? this->cols()
|
||||
: this->rows();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC RefBase()
|
||||
: Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime),
|
||||
// Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values:
|
||||
m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
|
||||
StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)
|
||||
{}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)
|
||||
|
||||
protected:
|
||||
|
||||
typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;
|
||||
|
||||
template<typename Expression>
|
||||
EIGEN_DEVICE_FUNC void construct(Expression& expr)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
|
||||
|
||||
if(PlainObjectType::RowsAtCompileTime==1)
|
||||
{
|
||||
eigen_assert(expr.rows()==1 || expr.cols()==1);
|
||||
::new (static_cast<Base*>(this)) Base(expr.data(), 1, expr.size());
|
||||
}
|
||||
else if(PlainObjectType::ColsAtCompileTime==1)
|
||||
{
|
||||
eigen_assert(expr.rows()==1 || expr.cols()==1);
|
||||
::new (static_cast<Base*>(this)) Base(expr.data(), expr.size(), 1);
|
||||
}
|
||||
else
|
||||
::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
|
||||
|
||||
if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit)))
|
||||
::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1);
|
||||
else
|
||||
::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
|
||||
StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
|
||||
}
|
||||
|
||||
StrideBase m_stride;
|
||||
};
|
||||
|
||||
/** \class Ref
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief A matrix or vector expression mapping an existing expression
|
||||
*
|
||||
* \tparam PlainObjectType the equivalent matrix type of the mapped data
|
||||
* \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
|
||||
* The default is \c #Unaligned.
|
||||
* \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
|
||||
* but accepts a variable outer stride (leading dimension).
|
||||
* This can be overridden by specifying strides.
|
||||
* The type passed here must be a specialization of the Stride template, see examples below.
|
||||
*
|
||||
* This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
|
||||
* A Ref<> object can represent either a const expression or a l-value:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo1(Ref<VectorXf> x);
|
||||
*
|
||||
* // read-only const argument:
|
||||
* void foo2(const Ref<const VectorXf>& x);
|
||||
* \endcode
|
||||
*
|
||||
* In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
|
||||
* By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
|
||||
* Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
|
||||
* the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
|
||||
* can be greater than the number of rows.
|
||||
*
|
||||
* In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
|
||||
* Here are some examples:
|
||||
* \code
|
||||
* MatrixXf A;
|
||||
* VectorXf a;
|
||||
* foo1(a.head()); // OK
|
||||
* foo1(A.col()); // OK
|
||||
* foo1(A.row()); // Compilation error because here innerstride!=1
|
||||
* foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
|
||||
* foo2(A.row().transpose()); // The row is copied into a contiguous temporary
|
||||
* foo2(2*a); // The expression is evaluated into a temporary
|
||||
* foo2(A.col().segment(2,4)); // No temporary
|
||||
* \endcode
|
||||
*
|
||||
* The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
|
||||
* Here is an example accepting an innerstride!=1:
|
||||
* \code
|
||||
* // in-out argument:
|
||||
* void foo3(Ref<VectorXf,0,InnerStride<> > x);
|
||||
* foo3(A.row()); // OK
|
||||
* \endcode
|
||||
* The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
|
||||
* expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
|
||||
* template function, e.g.:
|
||||
* \code
|
||||
* // in the .h:
|
||||
* void foo(const Ref<MatrixXf>& A);
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A);
|
||||
*
|
||||
* // in the .cpp:
|
||||
* template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
|
||||
* ... // crazy code goes here
|
||||
* }
|
||||
* void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
|
||||
* void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
|
||||
* \endcode
|
||||
*
|
||||
*
|
||||
* \sa PlainObjectBase::Map(), \ref TopicStorageOrders
|
||||
*/
|
||||
template<typename PlainObjectType, int Options, typename StrideType> class Ref
|
||||
: public RefBase<Ref<PlainObjectType, Options, StrideType> >
|
||||
{
|
||||
private:
|
||||
typedef internal::traits<Ref> Traits;
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
|
||||
public:
|
||||
|
||||
typedef RefBase<Ref> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
|
||||
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||
Base::construct(expr.derived());
|
||||
}
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
|
||||
#else
|
||||
/** Implicit constructor from any dense expression */
|
||||
template<typename Derived>
|
||||
inline Ref(DenseBase<Derived>& expr)
|
||||
#endif
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
|
||||
EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
|
||||
EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
|
||||
Base::construct(expr.const_cast_derived());
|
||||
}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref)
|
||||
|
||||
};
|
||||
|
||||
// this is the const ref version
|
||||
template<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>
|
||||
: public RefBase<Ref<const TPlainObjectType, Options, StrideType> >
|
||||
{
|
||||
typedef internal::traits<Ref> Traits;
|
||||
public:
|
||||
|
||||
typedef RefBase<Ref> Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
|
||||
typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
|
||||
{
|
||||
// std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
|
||||
// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
|
||||
// std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n";
|
||||
construct(expr.derived(), typename Traits::template match<Derived>::type());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) {
|
||||
// copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
|
||||
}
|
||||
|
||||
template<typename OtherRef>
|
||||
EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) {
|
||||
construct(other.derived(), typename Traits::template match<OtherRef>::type());
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
template<typename Expression>
|
||||
EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)
|
||||
{
|
||||
Base::construct(expr);
|
||||
}
|
||||
|
||||
template<typename Expression>
|
||||
EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
|
||||
{
|
||||
internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar,Scalar>());
|
||||
Base::construct(m_object);
|
||||
}
|
||||
|
||||
protected:
|
||||
TPlainObjectType m_object;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_REF_H
|
||||
142
external/include/eigen3/Eigen/src/Core/Replicate.h
vendored
Normal file
142
external/include/eigen3/Eigen/src/Core/Replicate.h
vendored
Normal file
@@ -0,0 +1,142 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_REPLICATE_H
|
||||
#define EIGEN_REPLICATE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType,int RowFactor,int ColFactor>
|
||||
struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename traits<MatrixType>::StorageKind StorageKind;
|
||||
typedef typename traits<MatrixType>::XprKind XprKind;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
enum {
|
||||
RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
|
||||
? Dynamic
|
||||
: RowFactor * MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic
|
||||
? Dynamic
|
||||
: ColFactor * MatrixType::ColsAtCompileTime,
|
||||
//FIXME we don't propagate the max sizes !!!
|
||||
MaxRowsAtCompileTime = RowsAtCompileTime,
|
||||
MaxColsAtCompileTime = ColsAtCompileTime,
|
||||
IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
|
||||
: MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
|
||||
: (MatrixType::Flags & RowMajorBit) ? 1 : 0,
|
||||
|
||||
// FIXME enable DirectAccess with negative strides?
|
||||
Flags = IsRowMajor ? RowMajorBit : 0
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* \class Replicate
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the multiple replication of a matrix or vector
|
||||
*
|
||||
* \tparam MatrixType the type of the object we are replicating
|
||||
* \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.
|
||||
* \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.
|
||||
*
|
||||
* This class represents an expression of the multiple replication of a matrix or vector.
|
||||
* It is the return type of DenseBase::replicate() and most of the time
|
||||
* this is the only way it is used.
|
||||
*
|
||||
* \sa DenseBase::replicate()
|
||||
*/
|
||||
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
|
||||
: public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
|
||||
{
|
||||
typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
|
||||
typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<Replicate>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
|
||||
template<typename OriginalMatrixType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline explicit Replicate(const OriginalMatrixType& matrix)
|
||||
: m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
||||
eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
|
||||
}
|
||||
|
||||
template<typename OriginalMatrixType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
|
||||
: m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
|
||||
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const _MatrixTypeNested& nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
protected:
|
||||
MatrixTypeNested m_matrix;
|
||||
const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
|
||||
const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
|
||||
};
|
||||
|
||||
/**
|
||||
* \return an expression of the replication of \c *this
|
||||
*
|
||||
* Example: \include MatrixBase_replicate.cpp
|
||||
* Output: \verbinclude MatrixBase_replicate.out
|
||||
*
|
||||
* \sa VectorwiseOp::replicate(), DenseBase::replicate(Index,Index), class Replicate
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<int RowFactor, int ColFactor>
|
||||
const Replicate<Derived,RowFactor,ColFactor>
|
||||
DenseBase<Derived>::replicate() const
|
||||
{
|
||||
return Replicate<Derived,RowFactor,ColFactor>(derived());
|
||||
}
|
||||
|
||||
/**
|
||||
* \return an expression of the replication of each column (or row) of \c *this
|
||||
*
|
||||
* Example: \include DirectionWise_replicate_int.cpp
|
||||
* Output: \verbinclude DirectionWise_replicate_int.out
|
||||
*
|
||||
* \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate
|
||||
*/
|
||||
template<typename ExpressionType, int Direction>
|
||||
const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
|
||||
VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const
|
||||
{
|
||||
return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
|
||||
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_REPLICATE_H
|
||||
117
external/include/eigen3/Eigen/src/Core/ReturnByValue.h
vendored
Normal file
117
external/include/eigen3/Eigen/src/Core/ReturnByValue.h
vendored
Normal file
@@ -0,0 +1,117 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_RETURNBYVALUE_H
|
||||
#define EIGEN_RETURNBYVALUE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived>
|
||||
struct traits<ReturnByValue<Derived> >
|
||||
: public traits<typename traits<Derived>::ReturnType>
|
||||
{
|
||||
enum {
|
||||
// We're disabling the DirectAccess because e.g. the constructor of
|
||||
// the Block-with-DirectAccess expression requires to have a coeffRef method.
|
||||
// Also, we don't want to have to implement the stride stuff.
|
||||
Flags = (traits<typename traits<Derived>::ReturnType>::Flags
|
||||
| EvalBeforeNestingBit) & ~DirectAccessBit
|
||||
};
|
||||
};
|
||||
|
||||
/* The ReturnByValue object doesn't even have a coeff() method.
|
||||
* So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
|
||||
* So internal::nested always gives the plain return matrix type.
|
||||
*
|
||||
* FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
|
||||
* Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
|
||||
*/
|
||||
template<typename Derived,int n,typename PlainObject>
|
||||
struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
|
||||
{
|
||||
typedef typename traits<Derived>::ReturnType type;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class ReturnByValue
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*/
|
||||
template<typename Derived> class ReturnByValue
|
||||
: public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
typedef typename internal::traits<Derived>::ReturnType ReturnType;
|
||||
|
||||
typedef typename internal::dense_xpr_base<ReturnByValue>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
|
||||
|
||||
template<typename Dest>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void evalTo(Dest& dst) const
|
||||
{ static_cast<const Derived*>(this)->evalTo(dst); }
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
|
||||
class Unusable{
|
||||
Unusable(const Unusable&) {}
|
||||
Unusable& operator=(const Unusable&) {return *this;}
|
||||
};
|
||||
const Unusable& coeff(Index) const { return *reinterpret_cast<const Unusable*>(this); }
|
||||
const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
|
||||
Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
|
||||
Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
|
||||
#undef Unusable
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
|
||||
{
|
||||
other.evalTo(derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
|
||||
// when a ReturnByValue expression is assigned, the evaluator is not constructed.
|
||||
// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world
|
||||
|
||||
template<typename Derived>
|
||||
struct evaluator<ReturnByValue<Derived> >
|
||||
: public evaluator<typename internal::traits<Derived>::ReturnType>
|
||||
{
|
||||
typedef ReturnByValue<Derived> XprType;
|
||||
typedef typename internal::traits<Derived>::ReturnType PlainObject;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
|
||||
: m_result(xpr.rows(), xpr.cols())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
xpr.evalTo(m_result);
|
||||
}
|
||||
|
||||
protected:
|
||||
PlainObject m_result;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_RETURNBYVALUE_H
|
||||
211
external/include/eigen3/Eigen/src/Core/Reverse.h
vendored
Normal file
211
external/include/eigen3/Eigen/src/Core/Reverse.h
vendored
Normal file
@@ -0,0 +1,211 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2009 Ricard Marxer <email@ricardmarxer.com>
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_REVERSE_H
|
||||
#define EIGEN_REVERSE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename MatrixType, int Direction>
|
||||
struct traits<Reverse<MatrixType, Direction> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename traits<MatrixType>::StorageKind StorageKind;
|
||||
typedef typename traits<MatrixType>::XprKind XprKind;
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
|
||||
};
|
||||
};
|
||||
|
||||
template<typename PacketType, bool ReversePacket> struct reverse_packet_cond
|
||||
{
|
||||
static inline PacketType run(const PacketType& x) { return preverse(x); }
|
||||
};
|
||||
|
||||
template<typename PacketType> struct reverse_packet_cond<PacketType,false>
|
||||
{
|
||||
static inline PacketType run(const PacketType& x) { return x; }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class Reverse
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the reverse of a vector or matrix
|
||||
*
|
||||
* \tparam MatrixType the type of the object of which we are taking the reverse
|
||||
* \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
|
||||
*
|
||||
* This class represents an expression of the reverse of a vector.
|
||||
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
|
||||
*/
|
||||
template<typename MatrixType, int Direction> class Reverse
|
||||
: public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<Reverse>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
using Base::IsRowMajor;
|
||||
|
||||
protected:
|
||||
enum {
|
||||
PacketSize = internal::packet_traits<Scalar>::size,
|
||||
IsColMajor = !IsRowMajor,
|
||||
ReverseRow = (Direction == Vertical) || (Direction == BothDirections),
|
||||
ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
|
||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1,
|
||||
ReversePacket = (Direction == BothDirections)
|
||||
|| ((Direction == Vertical) && IsColMajor)
|
||||
|| ((Direction == Horizontal) && IsRowMajor)
|
||||
};
|
||||
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
|
||||
public:
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const
|
||||
{
|
||||
return -m_matrix.innerStride();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
return m_matrix;
|
||||
}
|
||||
|
||||
protected:
|
||||
typename MatrixType::Nested m_matrix;
|
||||
};
|
||||
|
||||
/** \returns an expression of the reverse of *this.
|
||||
*
|
||||
* Example: \include MatrixBase_reverse.cpp
|
||||
* Output: \verbinclude MatrixBase_reverse.out
|
||||
*
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::ReverseReturnType
|
||||
DenseBase<Derived>::reverse()
|
||||
{
|
||||
return ReverseReturnType(derived());
|
||||
}
|
||||
|
||||
|
||||
//reverse const overload moved DenseBase.h due to a CUDA compiler bug
|
||||
|
||||
/** This is the "in place" version of reverse: it reverses \c *this.
|
||||
*
|
||||
* In most cases it is probably better to simply use the reversed expression
|
||||
* of a matrix. However, when reversing the matrix data itself is really needed,
|
||||
* then this "in-place" version is probably the right choice because it provides
|
||||
* the following additional benefits:
|
||||
* - less error prone: doing the same operation with .reverse() requires special care:
|
||||
* \code m = m.reverse().eval(); \endcode
|
||||
* - this API enables reverse operations without the need for a temporary
|
||||
* - it allows future optimizations (cache friendliness, etc.)
|
||||
*
|
||||
* \sa VectorwiseOp::reverseInPlace(), reverse() */
|
||||
template<typename Derived>
|
||||
inline void DenseBase<Derived>::reverseInPlace()
|
||||
{
|
||||
if(cols()>rows())
|
||||
{
|
||||
Index half = cols()/2;
|
||||
leftCols(half).swap(rightCols(half).reverse());
|
||||
if((cols()%2)==1)
|
||||
{
|
||||
Index half2 = rows()/2;
|
||||
col(half).head(half2).swap(col(half).tail(half2).reverse());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Index half = rows()/2;
|
||||
topRows(half).swap(bottomRows(half).reverse());
|
||||
if((rows()%2)==1)
|
||||
{
|
||||
Index half2 = cols()/2;
|
||||
row(half).head(half2).swap(row(half).tail(half2).reverse());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int Direction>
|
||||
struct vectorwise_reverse_inplace_impl;
|
||||
|
||||
template<>
|
||||
struct vectorwise_reverse_inplace_impl<Vertical>
|
||||
{
|
||||
template<typename ExpressionType>
|
||||
static void run(ExpressionType &xpr)
|
||||
{
|
||||
Index half = xpr.rows()/2;
|
||||
xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct vectorwise_reverse_inplace_impl<Horizontal>
|
||||
{
|
||||
template<typename ExpressionType>
|
||||
static void run(ExpressionType &xpr)
|
||||
{
|
||||
Index half = xpr.cols()/2;
|
||||
xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
|
||||
*
|
||||
* In most cases it is probably better to simply use the reversed expression
|
||||
* of a matrix. However, when reversing the matrix data itself is really needed,
|
||||
* then this "in-place" version is probably the right choice because it provides
|
||||
* the following additional benefits:
|
||||
* - less error prone: doing the same operation with .reverse() requires special care:
|
||||
* \code m = m.reverse().eval(); \endcode
|
||||
* - this API enables reverse operations without the need for a temporary
|
||||
*
|
||||
* \sa DenseBase::reverseInPlace(), reverse() */
|
||||
template<typename ExpressionType, int Direction>
|
||||
void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
|
||||
{
|
||||
internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_REVERSE_H
|
||||
162
external/include/eigen3/Eigen/src/Core/Select.h
vendored
Normal file
162
external/include/eigen3/Eigen/src/Core/Select.h
vendored
Normal file
@@ -0,0 +1,162 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SELECT_H
|
||||
#define EIGEN_SELECT_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Select
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a coefficient wise version of the C++ ternary operator ?:
|
||||
*
|
||||
* \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
|
||||
* \param ThenMatrixType the type of the \em then expression
|
||||
* \param ElseMatrixType the type of the \em else expression
|
||||
*
|
||||
* This class represents an expression of a coefficient wise version of the C++ ternary operator ?:.
|
||||
* It is the return type of DenseBase::select() and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
|
||||
struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
||||
: traits<ThenMatrixType>
|
||||
{
|
||||
typedef typename traits<ThenMatrixType>::Scalar Scalar;
|
||||
typedef Dense StorageKind;
|
||||
typedef typename traits<ThenMatrixType>::XprKind XprKind;
|
||||
typedef typename ConditionMatrixType::Nested ConditionMatrixNested;
|
||||
typedef typename ThenMatrixType::Nested ThenMatrixNested;
|
||||
typedef typename ElseMatrixType::Nested ElseMatrixNested;
|
||||
enum {
|
||||
RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
|
||||
Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
|
||||
class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type,
|
||||
internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<Select>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Select)
|
||||
|
||||
inline EIGEN_DEVICE_FUNC
|
||||
Select(const ConditionMatrixType& a_conditionMatrix,
|
||||
const ThenMatrixType& a_thenMatrix,
|
||||
const ElseMatrixType& a_elseMatrix)
|
||||
: m_condition(a_conditionMatrix), m_then(a_thenMatrix), m_else(a_elseMatrix)
|
||||
{
|
||||
eigen_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows());
|
||||
eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
|
||||
}
|
||||
|
||||
inline EIGEN_DEVICE_FUNC Index rows() const { return m_condition.rows(); }
|
||||
inline EIGEN_DEVICE_FUNC Index cols() const { return m_condition.cols(); }
|
||||
|
||||
inline EIGEN_DEVICE_FUNC
|
||||
const Scalar coeff(Index i, Index j) const
|
||||
{
|
||||
if (m_condition.coeff(i,j))
|
||||
return m_then.coeff(i,j);
|
||||
else
|
||||
return m_else.coeff(i,j);
|
||||
}
|
||||
|
||||
inline EIGEN_DEVICE_FUNC
|
||||
const Scalar coeff(Index i) const
|
||||
{
|
||||
if (m_condition.coeff(i))
|
||||
return m_then.coeff(i);
|
||||
else
|
||||
return m_else.coeff(i);
|
||||
}
|
||||
|
||||
inline EIGEN_DEVICE_FUNC const ConditionMatrixType& conditionMatrix() const
|
||||
{
|
||||
return m_condition;
|
||||
}
|
||||
|
||||
inline EIGEN_DEVICE_FUNC const ThenMatrixType& thenMatrix() const
|
||||
{
|
||||
return m_then;
|
||||
}
|
||||
|
||||
inline EIGEN_DEVICE_FUNC const ElseMatrixType& elseMatrix() const
|
||||
{
|
||||
return m_else;
|
||||
}
|
||||
|
||||
protected:
|
||||
typename ConditionMatrixType::Nested m_condition;
|
||||
typename ThenMatrixType::Nested m_then;
|
||||
typename ElseMatrixType::Nested m_else;
|
||||
};
|
||||
|
||||
|
||||
/** \returns a matrix where each coefficient (i,j) is equal to \a thenMatrix(i,j)
|
||||
* if \c *this(i,j), and \a elseMatrix(i,j) otherwise.
|
||||
*
|
||||
* Example: \include MatrixBase_select.cpp
|
||||
* Output: \verbinclude MatrixBase_select.out
|
||||
*
|
||||
* \sa class Select
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename ThenDerived,typename ElseDerived>
|
||||
inline const Select<Derived,ThenDerived,ElseDerived>
|
||||
DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
|
||||
const DenseBase<ElseDerived>& elseMatrix) const
|
||||
{
|
||||
return Select<Derived,ThenDerived,ElseDerived>(derived(), thenMatrix.derived(), elseMatrix.derived());
|
||||
}
|
||||
|
||||
/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with
|
||||
* the \em else expression being a scalar value.
|
||||
*
|
||||
* \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const, class Select
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename ThenDerived>
|
||||
inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
|
||||
DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
|
||||
const typename ThenDerived::Scalar& elseScalar) const
|
||||
{
|
||||
return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(
|
||||
derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));
|
||||
}
|
||||
|
||||
/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with
|
||||
* the \em then expression being a scalar value.
|
||||
*
|
||||
* \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const, class Select
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename ElseDerived>
|
||||
inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
|
||||
DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
|
||||
const DenseBase<ElseDerived>& elseMatrix) const
|
||||
{
|
||||
return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(
|
||||
derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SELECT_H
|
||||
352
external/include/eigen3/Eigen/src/Core/SelfAdjointView.h
vendored
Normal file
352
external/include/eigen3/Eigen/src/Core/SelfAdjointView.h
vendored
Normal file
@@ -0,0 +1,352 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SELFADJOINTMATRIX_H
|
||||
#define EIGEN_SELFADJOINTMATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class SelfAdjointView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
*
|
||||
* \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
|
||||
*
|
||||
* \param MatrixType the type of the dense matrix storing the coefficients
|
||||
* \param TriangularPart can be either \c #Lower or \c #Upper
|
||||
*
|
||||
* This class is an expression of a sefladjoint matrix from a triangular part of a matrix
|
||||
* with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
|
||||
* and most of the time this is the only way that it is used.
|
||||
*
|
||||
* \sa class TriangularBase, MatrixBase::selfadjointView()
|
||||
*/
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType, unsigned int UpLo>
|
||||
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||
typedef MatrixType ExpressionType;
|
||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||
enum {
|
||||
Mode = UpLo | SelfAdjoint,
|
||||
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
|
||||
Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits|FlagsLvalueBit)
|
||||
& (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
||||
: public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef TriangularBase<SelfAdjointView> Base;
|
||||
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
|
||||
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
|
||||
typedef MatrixTypeNestedCleaned NestedExpression;
|
||||
|
||||
/** \brief The type of coefficients in this matrix */
|
||||
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
|
||||
|
||||
enum {
|
||||
Mode = internal::traits<SelfAdjointView>::Mode,
|
||||
Flags = internal::traits<SelfAdjointView>::Flags,
|
||||
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
|
||||
};
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return m_matrix.outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return m_matrix.innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::coeff()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
return m_matrix.coeff(row, col);
|
||||
}
|
||||
|
||||
/** \sa MatrixBase::coeffRef()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
|
||||
Base::check_coordinates_internal(row, col);
|
||||
return m_matrix.coeffRef(row, col);
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
|
||||
|
||||
/** Efficient triangular matrix times vector/matrix product */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<SelfAdjointView,OtherDerived>
|
||||
operator*(const MatrixBase<OtherDerived>& rhs) const
|
||||
{
|
||||
return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived());
|
||||
}
|
||||
|
||||
/** Efficient vector/matrix times triangular matrix product */
|
||||
template<typename OtherDerived> friend
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<OtherDerived,SelfAdjointView>
|
||||
operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
|
||||
{
|
||||
return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);
|
||||
}
|
||||
|
||||
friend EIGEN_DEVICE_FUNC
|
||||
const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>
|
||||
operator*(const Scalar& s, const SelfAdjointView& mat)
|
||||
{
|
||||
return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
|
||||
}
|
||||
|
||||
/** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
|
||||
* \f$ this = this + \alpha u v^* + conj(\alpha) v u^* \f$
|
||||
* \returns a reference to \c *this
|
||||
*
|
||||
* The vectors \a u and \c v \b must be column vectors, however they can be
|
||||
* a adjoint expression without any overhead. Only the meaningful triangular
|
||||
* part of the matrix is updated, the rest is left unchanged.
|
||||
*
|
||||
* \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
|
||||
*/
|
||||
template<typename DerivedU, typename DerivedV>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
|
||||
|
||||
/** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
|
||||
* \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix.
|
||||
*
|
||||
* \returns a reference to \c *this
|
||||
*
|
||||
* Note that to perform \f$ this = this + \alpha ( u^* u ) \f$ you can simply
|
||||
* call this function with u.adjoint().
|
||||
*
|
||||
* \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
|
||||
*/
|
||||
template<typename DerivedU>
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
|
||||
|
||||
/** \returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part
|
||||
*
|
||||
* The parameter \a TriMode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
|
||||
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
|
||||
*
|
||||
* If \c TriMode references the same triangular part than \c *this, then this method simply return a \c TriangularView of the nested expression,
|
||||
* otherwise, the nested expression is first transposed, thus returning a \c TriangularView<Transpose<MatrixType>> object.
|
||||
*
|
||||
* \sa MatrixBase::triangularView(), class TriangularView
|
||||
*/
|
||||
template<unsigned int TriMode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
|
||||
TriangularView<MatrixType,TriMode>,
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type
|
||||
triangularView() const
|
||||
{
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);
|
||||
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);
|
||||
return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
|
||||
TriangularView<MatrixType,TriMode>,
|
||||
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConjugateReturnType conjugate() const
|
||||
{ return ConjugateReturnType(m_matrix.conjugate()); }
|
||||
|
||||
typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
|
||||
/** \sa MatrixBase::adjoint() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const AdjointReturnType adjoint() const
|
||||
{ return AdjointReturnType(m_matrix.adjoint()); }
|
||||
|
||||
typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TransposeReturnType transpose()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||
return TransposeReturnType(tmp);
|
||||
}
|
||||
|
||||
typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConstTransposeReturnType transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(m_matrix.transpose());
|
||||
}
|
||||
|
||||
/** \returns a const expression of the main diagonal of the matrix \c *this
|
||||
*
|
||||
* This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.
|
||||
*
|
||||
* \sa MatrixBase::diagonal(), class Diagonal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename MatrixType::ConstDiagonalReturnType diagonal() const
|
||||
{
|
||||
return typename MatrixType::ConstDiagonalReturnType(m_matrix);
|
||||
}
|
||||
|
||||
/////////// Cholesky module ///////////
|
||||
|
||||
const LLT<PlainObject, UpLo> llt() const;
|
||||
const LDLT<PlainObject, UpLo> ldlt() const;
|
||||
|
||||
/////////// Eigenvalue module ///////////
|
||||
|
||||
/** Real part of #Scalar */
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
/** Return type of eigenvalues() */
|
||||
typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EigenvaluesReturnType eigenvalues() const;
|
||||
EIGEN_DEVICE_FUNC
|
||||
RealScalar operatorNorm() const;
|
||||
|
||||
protected:
|
||||
MatrixTypeNested m_matrix;
|
||||
};
|
||||
|
||||
|
||||
// template<typename OtherDerived, typename MatrixType, unsigned int UpLo>
|
||||
// internal::selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >
|
||||
// operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView<MatrixType,UpLo>& rhs)
|
||||
// {
|
||||
// return internal::matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >(lhs.derived(),rhs);
|
||||
// }
|
||||
|
||||
// selfadjoint to dense matrix
|
||||
|
||||
namespace internal {
|
||||
|
||||
// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
|
||||
// in the future selfadjoint-ness should be defined by the expression traits
|
||||
// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef SelfAdjointShape Shape;
|
||||
};
|
||||
|
||||
template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
|
||||
class triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version>
|
||||
: public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
|
||||
{
|
||||
protected:
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
|
||||
typedef typename Base::DstXprType DstXprType;
|
||||
typedef typename Base::SrcXprType SrcXprType;
|
||||
using Base::m_dst;
|
||||
using Base::m_src;
|
||||
using Base::m_functor;
|
||||
public:
|
||||
|
||||
typedef typename Base::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::AssignmentTraits AssignmentTraits;
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
|
||||
: Base(dst, src, func, dstExpr)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
|
||||
{
|
||||
eigen_internal_assert(row!=col);
|
||||
Scalar tmp = m_src.coeff(row,col);
|
||||
m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);
|
||||
m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
|
||||
{
|
||||
Base::assignCoeff(id,id);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index)
|
||||
{ eigen_internal_assert(false && "should never be called"); }
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
/** This is the const version of MatrixBase::selfadjointView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int UpLo>
|
||||
typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
|
||||
MatrixBase<Derived>::selfadjointView() const
|
||||
{
|
||||
return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix
|
||||
*
|
||||
* The parameter \a UpLo can be either \c #Upper or \c #Lower
|
||||
*
|
||||
* Example: \include MatrixBase_selfadjointView.cpp
|
||||
* Output: \verbinclude MatrixBase_selfadjointView.out
|
||||
*
|
||||
* \sa class SelfAdjointView
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<unsigned int UpLo>
|
||||
typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
|
||||
MatrixBase<Derived>::selfadjointView()
|
||||
{
|
||||
return typename SelfAdjointViewReturnType<UpLo>::Type(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SELFADJOINTMATRIX_H
|
||||
47
external/include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h
vendored
Normal file
47
external/include/eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SELFCWISEBINARYOP_H
|
||||
#define EIGEN_SELFCWISEBINARYOP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// TODO generalize the scalar type of 'other'
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
{
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
{
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
{
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SELFCWISEBINARYOP_H
|
||||
188
external/include/eigen3/Eigen/src/Core/Solve.h
vendored
Normal file
188
external/include/eigen3/Eigen/src/Core/Solve.h
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SOLVE_H
|
||||
#define EIGEN_SOLVE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
|
||||
|
||||
/** \class Solve
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Pseudo expression representing a solving operation
|
||||
*
|
||||
* \tparam Decomposition the type of the matrix or decomposion object
|
||||
* \tparam Rhstype the type of the right-hand side
|
||||
*
|
||||
* This class represents an expression of A.solve(B)
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
*/
|
||||
namespace internal {
|
||||
|
||||
// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse)
|
||||
template<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits;
|
||||
|
||||
template<typename Decomposition, typename RhsType>
|
||||
struct solve_traits<Decomposition,RhsType,Dense>
|
||||
{
|
||||
typedef typename make_proper_matrix_type<typename RhsType::Scalar,
|
||||
Decomposition::ColsAtCompileTime,
|
||||
RhsType::ColsAtCompileTime,
|
||||
RhsType::PlainObject::Options,
|
||||
Decomposition::MaxColsAtCompileTime,
|
||||
RhsType::MaxColsAtCompileTime>::type PlainObject;
|
||||
};
|
||||
|
||||
template<typename Decomposition, typename RhsType>
|
||||
struct traits<Solve<Decomposition, RhsType> >
|
||||
: traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject>
|
||||
{
|
||||
typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject;
|
||||
typedef typename promote_index_type<typename Decomposition::StorageIndex, typename RhsType::StorageIndex>::type StorageIndex;
|
||||
typedef traits<PlainObject> BaseTraits;
|
||||
enum {
|
||||
Flags = BaseTraits::Flags & RowMajorBit,
|
||||
CoeffReadCost = HugeCost
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
template<typename Decomposition, typename RhsType>
|
||||
class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>
|
||||
{
|
||||
public:
|
||||
typedef typename internal::traits<Solve>::PlainObject PlainObject;
|
||||
typedef typename internal::traits<Solve>::StorageIndex StorageIndex;
|
||||
|
||||
Solve(const Decomposition &dec, const RhsType &rhs)
|
||||
: m_dec(dec), m_rhs(rhs)
|
||||
{}
|
||||
|
||||
EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
|
||||
EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
|
||||
EIGEN_DEVICE_FUNC const RhsType& rhs() const { return m_rhs; }
|
||||
|
||||
protected:
|
||||
const Decomposition &m_dec;
|
||||
const RhsType &m_rhs;
|
||||
};
|
||||
|
||||
|
||||
// Specialization of the Solve expression for dense results
|
||||
template<typename Decomposition, typename RhsType>
|
||||
class SolveImpl<Decomposition,RhsType,Dense>
|
||||
: public MatrixBase<Solve<Decomposition,RhsType> >
|
||||
{
|
||||
typedef Solve<Decomposition,RhsType> Derived;
|
||||
|
||||
public:
|
||||
|
||||
typedef MatrixBase<Solve<Decomposition,RhsType> > Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
|
||||
|
||||
private:
|
||||
|
||||
Scalar coeff(Index row, Index col) const;
|
||||
Scalar coeff(Index i) const;
|
||||
};
|
||||
|
||||
// Generic API dispatcher
|
||||
template<typename Decomposition, typename RhsType, typename StorageKind>
|
||||
class SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type
|
||||
{
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Evaluator of Solve -> eval into a temporary
|
||||
template<typename Decomposition, typename RhsType>
|
||||
struct evaluator<Solve<Decomposition,RhsType> >
|
||||
: public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>
|
||||
{
|
||||
typedef Solve<Decomposition,RhsType> SolveType;
|
||||
typedef typename SolveType::PlainObject PlainObject;
|
||||
typedef evaluator<PlainObject> Base;
|
||||
|
||||
enum { Flags = Base::Flags | EvalBeforeNestingBit };
|
||||
|
||||
EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
|
||||
: m_result(solve.rows(), solve.cols())
|
||||
{
|
||||
::new (static_cast<Base*>(this)) Base(m_result);
|
||||
solve.dec()._solve_impl(solve.rhs(), m_result);
|
||||
}
|
||||
|
||||
protected:
|
||||
PlainObject m_result;
|
||||
};
|
||||
|
||||
// Specialization for "dst = dec.solve(rhs)"
|
||||
// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
|
||||
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
|
||||
struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
|
||||
{
|
||||
typedef Solve<DecType,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec()._solve_impl(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Specialization for "dst = dec.transpose().solve(rhs)"
|
||||
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
|
||||
struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
|
||||
{
|
||||
typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
|
||||
// Specialization for "dst = dec.adjoint().solve(rhs)"
|
||||
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
|
||||
struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>,
|
||||
internal::assign_op<Scalar,Scalar>, Dense2Dense>
|
||||
{
|
||||
typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namepsace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SOLVE_H
|
||||
235
external/include/eigen3/Eigen/src/Core/SolveTriangular.h
vendored
Normal file
235
external/include/eigen3/Eigen/src/Core/SolveTriangular.h
vendored
Normal file
@@ -0,0 +1,235 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SOLVETRIANGULAR_H
|
||||
#define EIGEN_SOLVETRIANGULAR_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Forward declarations:
|
||||
// The following two routines are implemented in the products/TriangularSolver*.h files
|
||||
template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
|
||||
struct triangular_solve_vector;
|
||||
|
||||
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
|
||||
struct triangular_solve_matrix;
|
||||
|
||||
// small helper struct extracting some traits on the underlying solver operation
|
||||
template<typename Lhs, typename Rhs, int Side>
|
||||
class trsolve_traits
|
||||
{
|
||||
private:
|
||||
enum {
|
||||
RhsIsVectorAtCompileTime = (Side==OnTheLeft ? Rhs::ColsAtCompileTime : Rhs::RowsAtCompileTime)==1
|
||||
};
|
||||
public:
|
||||
enum {
|
||||
Unrolling = (RhsIsVectorAtCompileTime && Rhs::SizeAtCompileTime != Dynamic && Rhs::SizeAtCompileTime <= 8)
|
||||
? CompleteUnrolling : NoUnrolling,
|
||||
RhsVectors = RhsIsVectorAtCompileTime ? 1 : Dynamic
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs,
|
||||
int Side, // can be OnTheLeft/OnTheRight
|
||||
int Mode, // can be Upper/Lower | UnitDiag
|
||||
int Unrolling = trsolve_traits<Lhs,Rhs,Side>::Unrolling,
|
||||
int RhsVectors = trsolve_traits<Lhs,Rhs,Side>::RhsVectors
|
||||
>
|
||||
struct triangular_solver_selector;
|
||||
|
||||
template<typename Lhs, typename Rhs, int Side, int Mode>
|
||||
struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
|
||||
{
|
||||
typedef typename Lhs::Scalar LhsScalar;
|
||||
typedef typename Rhs::Scalar RhsScalar;
|
||||
typedef blas_traits<Lhs> LhsProductTraits;
|
||||
typedef typename LhsProductTraits::ExtractType ActualLhsType;
|
||||
typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;
|
||||
static void run(const Lhs& lhs, Rhs& rhs)
|
||||
{
|
||||
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
|
||||
|
||||
// FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
|
||||
|
||||
bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
|
||||
|
||||
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
|
||||
(useRhsDirectly ? rhs.data() : 0));
|
||||
|
||||
if(!useRhsDirectly)
|
||||
MappedRhs(actualRhs,rhs.size()) = rhs;
|
||||
|
||||
triangular_solve_vector<LhsScalar, RhsScalar, Index, Side, Mode, LhsProductTraits::NeedToConjugate,
|
||||
(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
|
||||
::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
|
||||
|
||||
if(!useRhsDirectly)
|
||||
rhs = MappedRhs(actualRhs, rhs.size());
|
||||
}
|
||||
};
|
||||
|
||||
// the rhs is a matrix
|
||||
template<typename Lhs, typename Rhs, int Side, int Mode>
|
||||
struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
|
||||
{
|
||||
typedef typename Rhs::Scalar Scalar;
|
||||
typedef blas_traits<Lhs> LhsProductTraits;
|
||||
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
|
||||
|
||||
static void run(const Lhs& lhs, Rhs& rhs)
|
||||
{
|
||||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
|
||||
|
||||
const Index size = lhs.rows();
|
||||
const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();
|
||||
|
||||
typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
||||
Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;
|
||||
|
||||
BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
|
||||
|
||||
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
||||
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
|
||||
::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking);
|
||||
}
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* meta-unrolling implementation
|
||||
***************************************************************************/
|
||||
|
||||
template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size,
|
||||
bool Stop = LoopIndex==Size>
|
||||
struct triangular_solver_unroller;
|
||||
|
||||
template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
|
||||
struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
|
||||
enum {
|
||||
IsLower = ((Mode&Lower)==Lower),
|
||||
DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1,
|
||||
StartIndex = IsLower ? 0 : DiagIndex+1
|
||||
};
|
||||
static void run(const Lhs& lhs, Rhs& rhs)
|
||||
{
|
||||
if (LoopIndex>0)
|
||||
rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
|
||||
.cwiseProduct(rhs.template segment<LoopIndex>(StartIndex)).sum();
|
||||
|
||||
if(!(Mode & UnitDiag))
|
||||
rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex,DiagIndex);
|
||||
|
||||
triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex+1,Size>::run(lhs,rhs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
|
||||
struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
|
||||
static void run(const Lhs&, Rhs&) {}
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int Mode>
|
||||
struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
|
||||
static void run(const Lhs& lhs, Rhs& rhs)
|
||||
{ triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int Mode>
|
||||
struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
|
||||
static void run(const Lhs& lhs, Rhs& rhs)
|
||||
{
|
||||
Transpose<const Lhs> trLhs(lhs);
|
||||
Transpose<Rhs> trRhs(rhs);
|
||||
|
||||
triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>,
|
||||
((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
|
||||
0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***************************************************************************
|
||||
* TriangularView methods
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<int Side, typename OtherDerived>
|
||||
void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
|
||||
{
|
||||
OtherDerived& other = _other.const_cast_derived();
|
||||
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
|
||||
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
|
||||
// If solving for a 0x0 matrix, nothing to do, simply return.
|
||||
if (derived().cols() == 0)
|
||||
return;
|
||||
|
||||
enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
|
||||
typedef typename internal::conditional<copy,
|
||||
typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
|
||||
OtherCopy otherCopy(other);
|
||||
|
||||
internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
|
||||
Side, Mode>::run(derived().nestedExpression(), otherCopy);
|
||||
|
||||
if (copy)
|
||||
other = otherCopy;
|
||||
}
|
||||
|
||||
template<typename Derived, unsigned int Mode>
|
||||
template<int Side, typename Other>
|
||||
const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
|
||||
TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const
|
||||
{
|
||||
return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
template<int Side, typename TriangularType, typename Rhs>
|
||||
struct traits<triangular_solve_retval<Side, TriangularType, Rhs> >
|
||||
{
|
||||
typedef typename internal::plain_matrix_type_column_major<Rhs>::type ReturnType;
|
||||
};
|
||||
|
||||
template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval
|
||||
: public ReturnByValue<triangular_solve_retval<Side, TriangularType, Rhs> >
|
||||
{
|
||||
typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
|
||||
typedef ReturnByValue<triangular_solve_retval> Base;
|
||||
|
||||
triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
|
||||
: m_triangularMatrix(tri), m_rhs(rhs)
|
||||
{}
|
||||
|
||||
inline Index rows() const { return m_rhs.rows(); }
|
||||
inline Index cols() const { return m_rhs.cols(); }
|
||||
|
||||
template<typename Dest> inline void evalTo(Dest& dst) const
|
||||
{
|
||||
if(!is_same_dense(dst,m_rhs))
|
||||
dst = m_rhs;
|
||||
m_triangularMatrix.template solveInPlace<Side>(dst);
|
||||
}
|
||||
|
||||
protected:
|
||||
const TriangularType& m_triangularMatrix;
|
||||
typename Rhs::Nested m_rhs;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SOLVETRIANGULAR_H
|
||||
130
external/include/eigen3/Eigen/src/Core/SolverBase.h
vendored
Normal file
130
external/include/eigen3/Eigen/src/Core/SolverBase.h
vendored
Normal file
@@ -0,0 +1,130 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SOLVERBASE_H
|
||||
#define EIGEN_SOLVERBASE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \class SolverBase
|
||||
* \brief A base class for matrix decomposition and solvers
|
||||
*
|
||||
* \tparam Derived the actual type of the decomposition/solver.
|
||||
*
|
||||
* Any matrix decomposition inheriting this base class provide the following API:
|
||||
*
|
||||
* \code
|
||||
* MatrixType A, b, x;
|
||||
* DecompositionType dec(A);
|
||||
* x = dec.solve(b); // solve A * x = b
|
||||
* x = dec.transpose().solve(b); // solve A^T * x = b
|
||||
* x = dec.adjoint().solve(b); // solve A' * x = b
|
||||
* \endcode
|
||||
*
|
||||
* \warning Currently, any other usage of transpose() and adjoint() are not supported and will produce compilation errors.
|
||||
*
|
||||
* \sa class PartialPivLU, class FullPivLU
|
||||
*/
|
||||
template<typename Derived>
|
||||
class SolverBase : public EigenBase<Derived>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef EigenBase<Derived> Base;
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef Scalar CoeffReturnType;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret),
|
||||
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret),
|
||||
IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
|
||||
|| internal::traits<Derived>::MaxColsAtCompileTime == 1
|
||||
};
|
||||
|
||||
/** Default constructor */
|
||||
SolverBase()
|
||||
{}
|
||||
|
||||
~SolverBase()
|
||||
{}
|
||||
|
||||
using Base::derived;
|
||||
|
||||
/** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
|
||||
*/
|
||||
template<typename Rhs>
|
||||
inline const Solve<Derived, Rhs>
|
||||
solve(const MatrixBase<Rhs>& b) const
|
||||
{
|
||||
eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
|
||||
return Solve<Derived, Rhs>(derived(), b.derived());
|
||||
}
|
||||
|
||||
/** \internal the return type of transpose() */
|
||||
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
|
||||
/** \returns an expression of the transposed of the factored matrix.
|
||||
*
|
||||
* A typical usage is to solve for the transposed problem A^T x = b:
|
||||
* \code x = dec.transpose().solve(b); \endcode
|
||||
*
|
||||
* \sa adjoint(), solve()
|
||||
*/
|
||||
inline ConstTransposeReturnType transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(derived());
|
||||
}
|
||||
|
||||
/** \internal the return type of adjoint() */
|
||||
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
||||
ConstTransposeReturnType
|
||||
>::type AdjointReturnType;
|
||||
/** \returns an expression of the adjoint of the factored matrix
|
||||
*
|
||||
* A typical usage is to solve for the adjoint problem A' x = b:
|
||||
* \code x = dec.adjoint().solve(b); \endcode
|
||||
*
|
||||
* For real scalar types, this function is equivalent to transpose().
|
||||
*
|
||||
* \sa transpose(), solve()
|
||||
*/
|
||||
inline AdjointReturnType adjoint() const
|
||||
{
|
||||
return AdjointReturnType(derived().transpose());
|
||||
}
|
||||
|
||||
protected:
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived>
|
||||
struct generic_xpr_base<Derived, MatrixXpr, SolverStorage>
|
||||
{
|
||||
typedef SolverBase<Derived> type;
|
||||
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SOLVERBASE_H
|
||||
221
external/include/eigen3/Eigen/src/Core/StableNorm.h
vendored
Normal file
221
external/include/eigen3/Eigen/src/Core/StableNorm.h
vendored
Normal file
@@ -0,0 +1,221 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_STABLENORM_H
|
||||
#define EIGEN_STABLENORM_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename ExpressionType, typename Scalar>
|
||||
inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
|
||||
{
|
||||
Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
|
||||
|
||||
if(maxCoeff>scale)
|
||||
{
|
||||
ssq = ssq * numext::abs2(scale/maxCoeff);
|
||||
Scalar tmp = Scalar(1)/maxCoeff;
|
||||
if(tmp > NumTraits<Scalar>::highest())
|
||||
{
|
||||
invScale = NumTraits<Scalar>::highest();
|
||||
scale = Scalar(1)/invScale;
|
||||
}
|
||||
else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
|
||||
{
|
||||
invScale = Scalar(1);
|
||||
scale = maxCoeff;
|
||||
}
|
||||
else
|
||||
{
|
||||
scale = maxCoeff;
|
||||
invScale = tmp;
|
||||
}
|
||||
}
|
||||
else if(maxCoeff!=maxCoeff) // we got a NaN
|
||||
{
|
||||
scale = maxCoeff;
|
||||
}
|
||||
|
||||
// TODO if the maxCoeff is much much smaller than the current scale,
|
||||
// then we can neglect this sub vector
|
||||
if(scale>Scalar(0)) // if scale==0, then bl is 0
|
||||
ssq += (bl*invScale).squaredNorm();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename traits<Derived>::Scalar>::Real
|
||||
blueNorm_impl(const EigenBase<Derived>& _vec)
|
||||
{
|
||||
typedef typename Derived::RealScalar RealScalar;
|
||||
using std::pow;
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
const Derived& vec(_vec.derived());
|
||||
static bool initialized = false;
|
||||
static RealScalar b1, b2, s1m, s2m, rbig, relerr;
|
||||
if(!initialized)
|
||||
{
|
||||
int ibeta, it, iemin, iemax, iexp;
|
||||
RealScalar eps;
|
||||
// This program calculates the machine-dependent constants
|
||||
// bl, b2, slm, s2m, relerr overfl
|
||||
// from the "basic" machine-dependent numbers
|
||||
// nbig, ibeta, it, iemin, iemax, rbig.
|
||||
// The following define the basic machine-dependent constants.
|
||||
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
|
||||
// are used. For any specific computer, each of the assignment
|
||||
// statements can be replaced
|
||||
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
|
||||
it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
|
||||
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
|
||||
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
|
||||
rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
|
||||
|
||||
iexp = -((1-iemin)/2);
|
||||
b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
|
||||
iexp = (iemax + 1 - it)/2;
|
||||
b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
|
||||
|
||||
iexp = (2-iemin)/2;
|
||||
s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
|
||||
iexp = - ((iemax+it)/2);
|
||||
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
|
||||
|
||||
eps = RealScalar(pow(double(ibeta), 1-it));
|
||||
relerr = sqrt(eps); // tolerance for neglecting asml
|
||||
initialized = true;
|
||||
}
|
||||
Index n = vec.size();
|
||||
RealScalar ab2 = b2 / RealScalar(n);
|
||||
RealScalar asml = RealScalar(0);
|
||||
RealScalar amed = RealScalar(0);
|
||||
RealScalar abig = RealScalar(0);
|
||||
for(typename Derived::InnerIterator it(vec, 0); it; ++it)
|
||||
{
|
||||
RealScalar ax = abs(it.value());
|
||||
if(ax > ab2) abig += numext::abs2(ax*s2m);
|
||||
else if(ax < b1) asml += numext::abs2(ax*s1m);
|
||||
else amed += numext::abs2(ax);
|
||||
}
|
||||
if(amed!=amed)
|
||||
return amed; // we got a NaN
|
||||
if(abig > RealScalar(0))
|
||||
{
|
||||
abig = sqrt(abig);
|
||||
if(abig > rbig) // overflow, or *this contains INF values
|
||||
return abig; // return INF
|
||||
if(amed > RealScalar(0))
|
||||
{
|
||||
abig = abig/s2m;
|
||||
amed = sqrt(amed);
|
||||
}
|
||||
else
|
||||
return abig/s2m;
|
||||
}
|
||||
else if(asml > RealScalar(0))
|
||||
{
|
||||
if (amed > RealScalar(0))
|
||||
{
|
||||
abig = sqrt(amed);
|
||||
amed = sqrt(asml) / s1m;
|
||||
}
|
||||
else
|
||||
return sqrt(asml)/s1m;
|
||||
}
|
||||
else
|
||||
return sqrt(amed);
|
||||
asml = numext::mini(abig, amed);
|
||||
abig = numext::maxi(abig, amed);
|
||||
if(asml <= abig*relerr)
|
||||
return abig;
|
||||
else
|
||||
return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
|
||||
* This version use a blockwise two passes algorithm:
|
||||
* 1 - find the absolute largest coefficient \c s
|
||||
* 2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way
|
||||
*
|
||||
* For architecture/scalar types supporting vectorization, this version
|
||||
* is faster than blueNorm(). Otherwise the blueNorm() is much faster.
|
||||
*
|
||||
* \sa norm(), blueNorm(), hypotNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
MatrixBase<Derived>::stableNorm() const
|
||||
{
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
const Index blockSize = 4096;
|
||||
RealScalar scale(0);
|
||||
RealScalar invScale(1);
|
||||
RealScalar ssq(0); // sum of square
|
||||
|
||||
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
|
||||
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
|
||||
const DerivedCopy copy(derived());
|
||||
|
||||
enum {
|
||||
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|
||||
|| (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
|
||||
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
|
||||
&& (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
|
||||
};
|
||||
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
|
||||
typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
|
||||
Index n = size();
|
||||
|
||||
if(n==1)
|
||||
return abs(this->coeff(0));
|
||||
|
||||
Index bi = internal::first_default_aligned(copy);
|
||||
if (bi>0)
|
||||
internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
|
||||
for (; bi<n; bi+=blockSize)
|
||||
internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
|
||||
return scale * sqrt(ssq);
|
||||
}
|
||||
|
||||
/** \returns the \em l2 norm of \c *this using the Blue's algorithm.
|
||||
* A Portable Fortran Program to Find the Euclidean Norm of a Vector,
|
||||
* ACM TOMS, Vol 4, Issue 1, 1978.
|
||||
*
|
||||
* For architecture/scalar types without vectorization, this version
|
||||
* is much faster than stableNorm(). Otherwise the stableNorm() is faster.
|
||||
*
|
||||
* \sa norm(), stableNorm(), hypotNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
MatrixBase<Derived>::blueNorm() const
|
||||
{
|
||||
return internal::blueNorm_impl(*this);
|
||||
}
|
||||
|
||||
/** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
|
||||
* This version use a concatenation of hypot() calls, and it is very slow.
|
||||
*
|
||||
* \sa norm(), stableNorm()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
MatrixBase<Derived>::hypotNorm() const
|
||||
{
|
||||
return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_STABLENORM_H
|
||||
111
external/include/eigen3/Eigen/src/Core/Stride.h
vendored
Normal file
111
external/include/eigen3/Eigen/src/Core/Stride.h
vendored
Normal file
@@ -0,0 +1,111 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_STRIDE_H
|
||||
#define EIGEN_STRIDE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class Stride
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Holds strides information for Map
|
||||
*
|
||||
* This class holds the strides information for mapping arrays with strides with class Map.
|
||||
*
|
||||
* It holds two values: the inner stride and the outer stride.
|
||||
*
|
||||
* The inner stride is the pointer increment between two consecutive entries within a given row of a
|
||||
* row-major matrix or within a given column of a column-major matrix.
|
||||
*
|
||||
* The outer stride is the pointer increment between two consecutive rows of a row-major matrix or
|
||||
* between two consecutive columns of a column-major matrix.
|
||||
*
|
||||
* These two values can be passed either at compile-time as template parameters, or at runtime as
|
||||
* arguments to the constructor.
|
||||
*
|
||||
* Indeed, this class takes two template parameters:
|
||||
* \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
|
||||
* \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
|
||||
*
|
||||
* Here is an example:
|
||||
* \include Map_general_stride.cpp
|
||||
* Output: \verbinclude Map_general_stride.out
|
||||
*
|
||||
* \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
|
||||
*/
|
||||
template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
|
||||
class Stride
|
||||
{
|
||||
public:
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
enum {
|
||||
InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
|
||||
OuterStrideAtCompileTime = _OuterStrideAtCompileTime
|
||||
};
|
||||
|
||||
/** Default constructor, for use when strides are fixed at compile time */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride()
|
||||
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
|
||||
{
|
||||
eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
|
||||
}
|
||||
|
||||
/** Constructor allowing to pass the strides at runtime */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride(Index outerStride, Index innerStride)
|
||||
: m_outer(outerStride), m_inner(innerStride)
|
||||
{
|
||||
eigen_assert(innerStride>=0 && outerStride>=0);
|
||||
}
|
||||
|
||||
/** Copy constructor */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Stride(const Stride& other)
|
||||
: m_outer(other.outer()), m_inner(other.inner())
|
||||
{}
|
||||
|
||||
/** \returns the outer stride */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outer() const { return m_outer.value(); }
|
||||
/** \returns the inner stride */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index inner() const { return m_inner.value(); }
|
||||
|
||||
protected:
|
||||
internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
|
||||
internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
|
||||
};
|
||||
|
||||
/** \brief Convenience specialization of Stride to specify only an inner stride
|
||||
* See class Map for some examples */
|
||||
template<int Value>
|
||||
class InnerStride : public Stride<0, Value>
|
||||
{
|
||||
typedef Stride<0, Value> Base;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC InnerStride() : Base() {}
|
||||
EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} // FIXME making this explicit could break valid code
|
||||
};
|
||||
|
||||
/** \brief Convenience specialization of Stride to specify only an outer stride
|
||||
* See class Map for some examples */
|
||||
template<int Value>
|
||||
class OuterStride : public Stride<Value, 0>
|
||||
{
|
||||
typedef Stride<Value, 0> Base;
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC OuterStride() : Base() {}
|
||||
EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {} // FIXME making this explicit could break valid code
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_STRIDE_H
|
||||
67
external/include/eigen3/Eigen/src/Core/Swap.h
vendored
Normal file
67
external/include/eigen3/Eigen/src/Core/Swap.h
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_SWAP_H
|
||||
#define EIGEN_SWAP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Overload default assignPacket behavior for swapping them
|
||||
template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
|
||||
class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, Specialized>
|
||||
: public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn>
|
||||
{
|
||||
protected:
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
|
||||
using Base::m_dst;
|
||||
using Base::m_src;
|
||||
using Base::m_functor;
|
||||
|
||||
public:
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::DstXprType DstXprType;
|
||||
typedef swap_assign_op<Scalar> Functor;
|
||||
|
||||
EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
|
||||
: Base(dst, src, func, dstExpr)
|
||||
{}
|
||||
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacket(Index row, Index col)
|
||||
{
|
||||
PacketType tmp = m_src.template packet<LoadMode,PacketType>(row,col);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode,PacketType>(row,col));
|
||||
m_dst.template writePacket<StoreMode>(row,col,tmp);
|
||||
}
|
||||
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacket(Index index)
|
||||
{
|
||||
PacketType tmp = m_src.template packet<LoadMode,PacketType>(index);
|
||||
const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
|
||||
m_dst.template writePacket<StoreMode>(index,tmp);
|
||||
}
|
||||
|
||||
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
|
||||
template<int StoreMode, int LoadMode, typename PacketType>
|
||||
void assignPacketByOuterInner(Index outer, Index inner)
|
||||
{
|
||||
Index row = Base::rowIndexByOuterInner(outer, inner);
|
||||
Index col = Base::colIndexByOuterInner(outer, inner);
|
||||
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SWAP_H
|
||||
403
external/include/eigen3/Eigen/src/Core/Transpose.h
vendored
Normal file
403
external/include/eigen3/Eigen/src/Core/Transpose.h
vendored
Normal file
@@ -0,0 +1,403 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TRANSPOSE_H
|
||||
#define EIGEN_TRANSPOSE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType>
|
||||
struct traits<Transpose<MatrixType> > : public traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
|
||||
enum {
|
||||
RowsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||
ColsAtCompileTime = MatrixType::RowsAtCompileTime,
|
||||
MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||
MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
|
||||
Flags0 = traits<MatrixTypeNestedPlain>::Flags & ~(LvalueBit | NestByRefBit),
|
||||
Flags1 = Flags0 | FlagsLvalueBit,
|
||||
Flags = Flags1 ^ RowMajorBit,
|
||||
InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
|
||||
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename MatrixType, typename StorageKind> class TransposeImpl;
|
||||
|
||||
/** \class Transpose
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of the transpose of a matrix
|
||||
*
|
||||
* \tparam MatrixType the type of the object of which we are taking the transpose
|
||||
*
|
||||
* This class represents an expression of the transpose of a matrix.
|
||||
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
|
||||
*/
|
||||
template<typename MatrixType> class Transpose
|
||||
: public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
|
||||
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
|
||||
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
|
||||
EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<MatrixTypeNested>::type&
|
||||
nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::remove_reference<MatrixTypeNested>::type&
|
||||
nestedExpression() { return m_matrix; }
|
||||
|
||||
/** \internal */
|
||||
void resize(Index nrows, Index ncols) {
|
||||
m_matrix.resize(ncols,nrows);
|
||||
}
|
||||
|
||||
protected:
|
||||
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>
|
||||
struct TransposeImpl_base
|
||||
{
|
||||
typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
|
||||
};
|
||||
|
||||
template<typename MatrixType>
|
||||
struct TransposeImpl_base<MatrixType, false>
|
||||
{
|
||||
typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
// Generic API dispatcher
|
||||
template<typename XprType, typename StorageKind>
|
||||
class TransposeImpl
|
||||
: public internal::generic_xpr_base<Transpose<XprType> >::type
|
||||
{
|
||||
public:
|
||||
typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
|
||||
};
|
||||
|
||||
template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
|
||||
: public internal::TransposeImpl_base<MatrixType>::type
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
|
||||
using Base::coeffRef;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
|
||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
|
||||
|
||||
typedef typename internal::conditional<
|
||||
internal::is_lvalue<MatrixType>::value,
|
||||
Scalar,
|
||||
const Scalar
|
||||
>::type ScalarWithConstIfNotLvalue;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
|
||||
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
|
||||
|
||||
// FIXME: shall we keep the const version of coeffRef?
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||
{
|
||||
return derived().nestedExpression().coeffRef(colId, rowId);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Scalar& coeffRef(Index index) const
|
||||
{
|
||||
return derived().nestedExpression().coeffRef(index);
|
||||
}
|
||||
};
|
||||
|
||||
/** \returns an expression of the transpose of *this.
|
||||
*
|
||||
* Example: \include MatrixBase_transpose.cpp
|
||||
* Output: \verbinclude MatrixBase_transpose.out
|
||||
*
|
||||
* \warning If you want to replace a matrix by its own transpose, do \b NOT do this:
|
||||
* \code
|
||||
* m = m.transpose(); // bug!!! caused by aliasing effect
|
||||
* \endcode
|
||||
* Instead, use the transposeInPlace() method:
|
||||
* \code
|
||||
* m.transposeInPlace();
|
||||
* \endcode
|
||||
* which gives Eigen good opportunities for optimization, or alternatively you can also do:
|
||||
* \code
|
||||
* m = m.transpose().eval();
|
||||
* \endcode
|
||||
*
|
||||
* \sa transposeInPlace(), adjoint() */
|
||||
template<typename Derived>
|
||||
inline Transpose<Derived>
|
||||
DenseBase<Derived>::transpose()
|
||||
{
|
||||
return TransposeReturnType(derived());
|
||||
}
|
||||
|
||||
/** This is the const version of transpose().
|
||||
*
|
||||
* Make sure you read the warning for transpose() !
|
||||
*
|
||||
* \sa transposeInPlace(), adjoint() */
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::ConstTransposeReturnType
|
||||
DenseBase<Derived>::transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(derived());
|
||||
}
|
||||
|
||||
/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this.
|
||||
*
|
||||
* Example: \include MatrixBase_adjoint.cpp
|
||||
* Output: \verbinclude MatrixBase_adjoint.out
|
||||
*
|
||||
* \warning If you want to replace a matrix by its own adjoint, do \b NOT do this:
|
||||
* \code
|
||||
* m = m.adjoint(); // bug!!! caused by aliasing effect
|
||||
* \endcode
|
||||
* Instead, use the adjointInPlace() method:
|
||||
* \code
|
||||
* m.adjointInPlace();
|
||||
* \endcode
|
||||
* which gives Eigen good opportunities for optimization, or alternatively you can also do:
|
||||
* \code
|
||||
* m = m.adjoint().eval();
|
||||
* \endcode
|
||||
*
|
||||
* \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */
|
||||
template<typename Derived>
|
||||
inline const typename MatrixBase<Derived>::AdjointReturnType
|
||||
MatrixBase<Derived>::adjoint() const
|
||||
{
|
||||
return AdjointReturnType(this->transpose());
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* "in place" transpose implementation
|
||||
***************************************************************************/
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename MatrixType,
|
||||
bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
|
||||
bool MatchPacketSize =
|
||||
(int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
|
||||
&& (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
|
||||
struct inplace_transpose_selector;
|
||||
|
||||
template<typename MatrixType>
|
||||
struct inplace_transpose_selector<MatrixType,true,false> { // square matrix
|
||||
static void run(MatrixType& m) {
|
||||
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
|
||||
template<typename MatrixType>
|
||||
struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
|
||||
static void run(MatrixType& m) {
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
|
||||
const Index PacketSize = internal::packet_traits<Scalar>::size;
|
||||
const Index Alignment = internal::evaluator<MatrixType>::Alignment;
|
||||
PacketBlock<Packet> A;
|
||||
for (Index i=0; i<PacketSize; ++i)
|
||||
A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
|
||||
internal::ptranspose(A);
|
||||
for (Index i=0; i<PacketSize; ++i)
|
||||
m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename MatrixType,bool MatchPacketSize>
|
||||
struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix
|
||||
static void run(MatrixType& m) {
|
||||
if (m.rows()==m.cols())
|
||||
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
|
||||
else
|
||||
m = m.transpose().eval();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
|
||||
* Thus, doing
|
||||
* \code
|
||||
* m.transposeInPlace();
|
||||
* \endcode
|
||||
* has the same effect on m as doing
|
||||
* \code
|
||||
* m = m.transpose().eval();
|
||||
* \endcode
|
||||
* and is faster and also safer because in the latter line of code, forgetting the eval() results
|
||||
* in a bug caused by \ref TopicAliasing "aliasing".
|
||||
*
|
||||
* Notice however that this method is only useful if you want to replace a matrix by its own transpose.
|
||||
* If you just need the transpose of a matrix, use transpose().
|
||||
*
|
||||
* \note if the matrix is not square, then \c *this must be a resizable matrix.
|
||||
* This excludes (non-square) fixed-size matrices, block-expressions and maps.
|
||||
*
|
||||
* \sa transpose(), adjoint(), adjointInPlace() */
|
||||
template<typename Derived>
|
||||
inline void DenseBase<Derived>::transposeInPlace()
|
||||
{
|
||||
eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
|
||||
&& "transposeInPlace() called on a non-square non-resizable matrix");
|
||||
internal::inplace_transpose_selector<Derived>::run(derived());
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* "in place" adjoint implementation
|
||||
***************************************************************************/
|
||||
|
||||
/** This is the "in place" version of adjoint(): it replaces \c *this by its own transpose.
|
||||
* Thus, doing
|
||||
* \code
|
||||
* m.adjointInPlace();
|
||||
* \endcode
|
||||
* has the same effect on m as doing
|
||||
* \code
|
||||
* m = m.adjoint().eval();
|
||||
* \endcode
|
||||
* and is faster and also safer because in the latter line of code, forgetting the eval() results
|
||||
* in a bug caused by aliasing.
|
||||
*
|
||||
* Notice however that this method is only useful if you want to replace a matrix by its own adjoint.
|
||||
* If you just need the adjoint of a matrix, use adjoint().
|
||||
*
|
||||
* \note if the matrix is not square, then \c *this must be a resizable matrix.
|
||||
* This excludes (non-square) fixed-size matrices, block-expressions and maps.
|
||||
*
|
||||
* \sa transpose(), adjoint(), transposeInPlace() */
|
||||
template<typename Derived>
|
||||
inline void MatrixBase<Derived>::adjointInPlace()
|
||||
{
|
||||
derived() = adjoint().eval();
|
||||
}
|
||||
|
||||
#ifndef EIGEN_NO_DEBUG
|
||||
|
||||
// The following is to detect aliasing problems in most common cases.
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<bool DestIsTransposed, typename OtherDerived>
|
||||
struct check_transpose_aliasing_compile_time_selector
|
||||
{
|
||||
enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
|
||||
};
|
||||
|
||||
template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
|
||||
struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
|
||||
{
|
||||
enum { ret = bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
|
||||
|| bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
|
||||
struct check_transpose_aliasing_run_time_selector
|
||||
{
|
||||
static bool run(const Scalar* dest, const OtherDerived& src)
|
||||
{
|
||||
return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
|
||||
struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
|
||||
{
|
||||
static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
|
||||
{
|
||||
return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.lhs())))
|
||||
|| ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.rhs())));
|
||||
}
|
||||
};
|
||||
|
||||
// the following selector, checkTransposeAliasing_impl, based on MightHaveTransposeAliasing,
|
||||
// is because when the condition controlling the assert is known at compile time, ICC emits a warning.
|
||||
// This is actually a good warning: in expressions that don't have any transposing, the condition is
|
||||
// known at compile time to be false, and using that, we can avoid generating the code of the assert again
|
||||
// and again for all these expressions that don't need it.
|
||||
|
||||
template<typename Derived, typename OtherDerived,
|
||||
bool MightHaveTransposeAliasing
|
||||
= check_transpose_aliasing_compile_time_selector
|
||||
<blas_traits<Derived>::IsTransposed,OtherDerived>::ret
|
||||
>
|
||||
struct checkTransposeAliasing_impl
|
||||
{
|
||||
static void run(const Derived& dst, const OtherDerived& other)
|
||||
{
|
||||
eigen_assert((!check_transpose_aliasing_run_time_selector
|
||||
<typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
|
||||
::run(extract_data(dst), other))
|
||||
&& "aliasing detected during transposition, use transposeInPlace() "
|
||||
"or evaluate the rhs into a temporary using .eval()");
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived, typename OtherDerived>
|
||||
struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
|
||||
{
|
||||
static void run(const Derived&, const OtherDerived&)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Dst, typename Src>
|
||||
void check_for_aliasing(const Dst &dst, const Src &src)
|
||||
{
|
||||
internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#endif // EIGEN_NO_DEBUG
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TRANSPOSE_H
|
||||
407
external/include/eigen3/Eigen/src/Core/Transpositions.h
vendored
Normal file
407
external/include/eigen3/Eigen/src/Core/Transpositions.h
vendored
Normal file
@@ -0,0 +1,407 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TRANSPOSITIONS_H
|
||||
#define EIGEN_TRANSPOSITIONS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<typename Derived>
|
||||
class TranspositionsBase
|
||||
{
|
||||
typedef internal::traits<Derived> Traits;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndex;
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
|
||||
Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
|
||||
/** Copies the \a other transpositions into \c *this */
|
||||
template<typename OtherDerived>
|
||||
Derived& operator=(const TranspositionsBase<OtherDerived>& other)
|
||||
{
|
||||
indices() = other.indices();
|
||||
return derived();
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
Derived& operator=(const TranspositionsBase& other)
|
||||
{
|
||||
indices() = other.indices();
|
||||
return derived();
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \returns the number of transpositions */
|
||||
Index size() const { return indices().size(); }
|
||||
/** \returns the number of rows of the equivalent permutation matrix */
|
||||
Index rows() const { return indices().size(); }
|
||||
/** \returns the number of columns of the equivalent permutation matrix */
|
||||
Index cols() const { return indices().size(); }
|
||||
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const StorageIndex& operator()(Index i) const { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline StorageIndex& operator()(Index i) { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline const StorageIndex& operator[](Index i) const { return indices()(i); }
|
||||
/** Direct access to the underlying index vector */
|
||||
inline StorageIndex& operator[](Index i) { return indices()(i); }
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return derived().indices(); }
|
||||
/** \returns a reference to the stored array representing the transpositions. */
|
||||
IndicesType& indices() { return derived().indices(); }
|
||||
|
||||
/** Resizes to given size. */
|
||||
inline void resize(Index newSize)
|
||||
{
|
||||
indices().resize(newSize);
|
||||
}
|
||||
|
||||
/** Sets \c *this to represents an identity transformation */
|
||||
void setIdentity()
|
||||
{
|
||||
for(StorageIndex i = 0; i < indices().size(); ++i)
|
||||
coeffRef(i) = i;
|
||||
}
|
||||
|
||||
// FIXME: do we want such methods ?
|
||||
// might be usefull when the target matrix expression is complex, e.g.:
|
||||
// object.matrix().block(..,..,..,..) = trans * object.matrix().block(..,..,..,..);
|
||||
/*
|
||||
template<typename MatrixType>
|
||||
void applyForwardToRows(MatrixType& mat) const
|
||||
{
|
||||
for(Index k=0 ; k<size() ; ++k)
|
||||
if(m_indices(k)!=k)
|
||||
mat.row(k).swap(mat.row(m_indices(k)));
|
||||
}
|
||||
|
||||
template<typename MatrixType>
|
||||
void applyBackwardToRows(MatrixType& mat) const
|
||||
{
|
||||
for(Index k=size()-1 ; k>=0 ; --k)
|
||||
if(m_indices(k)!=k)
|
||||
mat.row(k).swap(mat.row(m_indices(k)));
|
||||
}
|
||||
*/
|
||||
|
||||
/** \returns the inverse transformation */
|
||||
inline Transpose<TranspositionsBase> inverse() const
|
||||
{ return Transpose<TranspositionsBase>(derived()); }
|
||||
|
||||
/** \returns the tranpose transformation */
|
||||
inline Transpose<TranspositionsBase> transpose() const
|
||||
{ return Transpose<TranspositionsBase>(derived()); }
|
||||
|
||||
protected:
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
{
|
||||
typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
|
||||
/** \class Transpositions
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Represents a sequence of transpositions (row/column interchange)
|
||||
*
|
||||
* \tparam SizeAtCompileTime the number of transpositions, or Dynamic
|
||||
* \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
|
||||
*
|
||||
* This class represents a permutation transformation as a sequence of \em n transpositions
|
||||
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
|
||||
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
|
||||
* the rows \c i and \c indices[i] of the matrix \c M.
|
||||
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
|
||||
*
|
||||
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
|
||||
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
|
||||
*
|
||||
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
|
||||
* \code
|
||||
* Transpositions tr;
|
||||
* MatrixXf mat;
|
||||
* mat = tr * mat;
|
||||
* \endcode
|
||||
* In this example, we detect that the matrix appears on both side, and so the transpositions
|
||||
* are applied in-place without any temporary or extra copy.
|
||||
*
|
||||
* \sa class PermutationMatrix
|
||||
*/
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
|
||||
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
{
|
||||
typedef internal::traits<Transpositions> Traits;
|
||||
public:
|
||||
|
||||
typedef TranspositionsBase<Transpositions> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndex;
|
||||
|
||||
inline Transpositions() {}
|
||||
|
||||
/** Copy constructor. */
|
||||
template<typename OtherDerived>
|
||||
inline Transpositions(const TranspositionsBase<OtherDerived>& other)
|
||||
: m_indices(other.indices()) {}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** Standard copy constructor. Defined only to prevent a default copy constructor
|
||||
* from hiding the other templated constructor */
|
||||
inline Transpositions(const Transpositions& other) : m_indices(other.indices()) {}
|
||||
#endif
|
||||
|
||||
/** Generic constructor from expression of the transposition indices. */
|
||||
template<typename Other>
|
||||
explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
|
||||
{}
|
||||
|
||||
/** Copies the \a other transpositions into \c *this */
|
||||
template<typename OtherDerived>
|
||||
Transpositions& operator=(const TranspositionsBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
Transpositions& operator=(const Transpositions& other)
|
||||
{
|
||||
m_indices = other.m_indices;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Constructs an uninitialized permutation matrix of given size.
|
||||
*/
|
||||
inline Transpositions(Index size) : m_indices(size)
|
||||
{}
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return m_indices; }
|
||||
/** \returns a reference to the stored array representing the transpositions. */
|
||||
IndicesType& indices() { return m_indices; }
|
||||
|
||||
protected:
|
||||
|
||||
IndicesType m_indices;
|
||||
};
|
||||
|
||||
|
||||
namespace internal {
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
|
||||
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >
|
||||
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
|
||||
{
|
||||
typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
|
||||
typedef _StorageIndex StorageIndex;
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
|
||||
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int PacketAccess>
|
||||
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess>
|
||||
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess> >
|
||||
{
|
||||
typedef internal::traits<Map> Traits;
|
||||
public:
|
||||
|
||||
typedef TranspositionsBase<Map> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndex;
|
||||
|
||||
explicit inline Map(const StorageIndex* indicesPtr)
|
||||
: m_indices(indicesPtr)
|
||||
{}
|
||||
|
||||
inline Map(const StorageIndex* indicesPtr, Index size)
|
||||
: m_indices(indicesPtr,size)
|
||||
{}
|
||||
|
||||
/** Copies the \a other transpositions into \c *this */
|
||||
template<typename OtherDerived>
|
||||
Map& operator=(const TranspositionsBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
Map& operator=(const Map& other)
|
||||
{
|
||||
m_indices = other.m_indices;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return m_indices; }
|
||||
|
||||
/** \returns a reference to the stored array representing the transpositions. */
|
||||
IndicesType& indices() { return m_indices; }
|
||||
|
||||
protected:
|
||||
|
||||
IndicesType m_indices;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<typename _IndicesType>
|
||||
struct traits<TranspositionsWrapper<_IndicesType> >
|
||||
: traits<PermutationWrapper<_IndicesType> >
|
||||
{
|
||||
typedef TranspositionsStorage StorageKind;
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _IndicesType>
|
||||
class TranspositionsWrapper
|
||||
: public TranspositionsBase<TranspositionsWrapper<_IndicesType> >
|
||||
{
|
||||
typedef internal::traits<TranspositionsWrapper> Traits;
|
||||
public:
|
||||
|
||||
typedef TranspositionsBase<TranspositionsWrapper> Base;
|
||||
typedef typename Traits::IndicesType IndicesType;
|
||||
typedef typename IndicesType::Scalar StorageIndex;
|
||||
|
||||
explicit inline TranspositionsWrapper(IndicesType& indices)
|
||||
: m_indices(indices)
|
||||
{}
|
||||
|
||||
/** Copies the \a other transpositions into \c *this */
|
||||
template<typename OtherDerived>
|
||||
TranspositionsWrapper& operator=(const TranspositionsBase<OtherDerived>& other)
|
||||
{
|
||||
return Base::operator=(other);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** This is a special case of the templated operator=. Its purpose is to
|
||||
* prevent a default operator= from hiding the templated operator=.
|
||||
*/
|
||||
TranspositionsWrapper& operator=(const TranspositionsWrapper& other)
|
||||
{
|
||||
m_indices = other.m_indices;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** const version of indices(). */
|
||||
const IndicesType& indices() const { return m_indices; }
|
||||
|
||||
/** \returns a reference to the stored array representing the transpositions. */
|
||||
IndicesType& indices() { return m_indices; }
|
||||
|
||||
protected:
|
||||
|
||||
typename IndicesType::Nested m_indices;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** \returns the \a matrix with the \a transpositions applied to the columns.
|
||||
*/
|
||||
template<typename MatrixDerived, typename TranspositionsDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
|
||||
operator*(const MatrixBase<MatrixDerived> &matrix,
|
||||
const TranspositionsBase<TranspositionsDerived>& transpositions)
|
||||
{
|
||||
return Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
|
||||
(matrix.derived(), transpositions.derived());
|
||||
}
|
||||
|
||||
/** \returns the \a matrix with the \a transpositions applied to the rows.
|
||||
*/
|
||||
template<typename TranspositionsDerived, typename MatrixDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
|
||||
operator*(const TranspositionsBase<TranspositionsDerived> &transpositions,
|
||||
const MatrixBase<MatrixDerived>& matrix)
|
||||
{
|
||||
return Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
|
||||
(transpositions.derived(), matrix.derived());
|
||||
}
|
||||
|
||||
// Template partial specialization for transposed/inverse transpositions
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived>
|
||||
struct traits<Transpose<TranspositionsBase<Derived> > >
|
||||
: traits<Derived>
|
||||
{};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<typename TranspositionsDerived>
|
||||
class Transpose<TranspositionsBase<TranspositionsDerived> >
|
||||
{
|
||||
typedef TranspositionsDerived TranspositionType;
|
||||
typedef typename TranspositionType::IndicesType IndicesType;
|
||||
public:
|
||||
|
||||
explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
|
||||
|
||||
Index size() const { return m_transpositions.size(); }
|
||||
Index rows() const { return m_transpositions.size(); }
|
||||
Index cols() const { return m_transpositions.size(); }
|
||||
|
||||
/** \returns the \a matrix with the inverse transpositions applied to the columns.
|
||||
*/
|
||||
template<typename OtherDerived> friend
|
||||
const Product<OtherDerived, Transpose, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
|
||||
{
|
||||
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
|
||||
}
|
||||
|
||||
/** \returns the \a matrix with the inverse transpositions applied to the rows.
|
||||
*/
|
||||
template<typename OtherDerived>
|
||||
const Product<Transpose, OtherDerived, AliasFreeProduct>
|
||||
operator*(const MatrixBase<OtherDerived>& matrix) const
|
||||
{
|
||||
return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
|
||||
}
|
||||
|
||||
const TranspositionType& nestedExpression() const { return m_transpositions; }
|
||||
|
||||
protected:
|
||||
const TranspositionType& m_transpositions;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TRANSPOSITIONS_H
|
||||
983
external/include/eigen3/Eigen/src/Core/TriangularMatrix.h
vendored
Normal file
983
external/include/eigen3/Eigen/src/Core/TriangularMatrix.h
vendored
Normal file
@@ -0,0 +1,983 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TRIANGULARMATRIX_H
|
||||
#define EIGEN_TRIANGULARMATRIX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval;
|
||||
|
||||
}
|
||||
|
||||
/** \class TriangularBase
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for triangular part in a matrix
|
||||
*/
|
||||
template<typename Derived> class TriangularBase : public EigenBase<Derived>
|
||||
{
|
||||
public:
|
||||
|
||||
enum {
|
||||
Mode = internal::traits<Derived>::Mode,
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
|
||||
|
||||
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime>::ret),
|
||||
/**< This is equal to the number of coefficients, i.e. the number of
|
||||
* rows times the number of columns, or to \a Dynamic if this is not
|
||||
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
|
||||
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret)
|
||||
|
||||
};
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::traits<Derived>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
|
||||
typedef typename internal::traits<Derived>::FullMatrixType DenseMatrixType;
|
||||
typedef DenseMatrixType DenseType;
|
||||
typedef Derived const& Nested;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return derived().rows(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return derived().cols(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return derived().outerStride(); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return derived().innerStride(); }
|
||||
|
||||
// dummy resize function
|
||||
void resize(Index rows, Index cols)
|
||||
{
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
eigen_assert(rows==this->rows() && cols==this->cols());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }
|
||||
|
||||
/** \see MatrixBase::copyCoeff(row,col)
|
||||
*/
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)
|
||||
{
|
||||
derived().coeffRef(row, col) = other.coeff(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar operator()(Index row, Index col) const
|
||||
{
|
||||
check_coordinates(row, col);
|
||||
return coeff(row,col);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& operator()(Index row, Index col)
|
||||
{
|
||||
check_coordinates(row, col);
|
||||
return coeffRef(row,col);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Derived& derived() { return *static_cast<Derived*>(this); }
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void evalTo(MatrixBase<DenseDerived> &other) const;
|
||||
template<typename DenseDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void evalToLazy(MatrixBase<DenseDerived> &other) const;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseMatrixType toDenseMatrix() const
|
||||
{
|
||||
DenseMatrixType res(rows(), cols());
|
||||
evalToLazy(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
void check_coordinates(Index row, Index col) const
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(row);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(col);
|
||||
eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
|
||||
const int mode = int(Mode) & ~SelfAdjoint;
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(mode);
|
||||
eigen_assert((mode==Upper && col>=row)
|
||||
|| (mode==Lower && col<=row)
|
||||
|| ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
|
||||
|| ((mode==StrictlyLower || mode==UnitLower) && col<row));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
void check_coordinates_internal(Index row, Index col) const
|
||||
{
|
||||
check_coordinates(row, col);
|
||||
}
|
||||
#else
|
||||
void check_coordinates_internal(Index , Index ) const {}
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
/** \class TriangularView
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a triangular part in a matrix
|
||||
*
|
||||
* \param MatrixType the type of the object in which we are taking the triangular part
|
||||
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
|
||||
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
|
||||
* This is in fact a bit field; it must have either #Upper or #Lower,
|
||||
* and additionally it may have #UnitDiag or #ZeroDiag or neither.
|
||||
*
|
||||
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
|
||||
* matrices one should speak of "trapezoid" parts. This class is the return type
|
||||
* of MatrixBase::triangularView() and SparseMatrixBase::triangularView(), and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa MatrixBase::triangularView()
|
||||
*/
|
||||
namespace internal {
|
||||
template<typename MatrixType, unsigned int _Mode>
|
||||
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
|
||||
{
|
||||
typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
|
||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||
typedef MatrixType ExpressionType;
|
||||
enum {
|
||||
Mode = _Mode,
|
||||
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
|
||||
Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)))
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl;
|
||||
|
||||
template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
||||
: public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base;
|
||||
typedef typename internal::traits<TriangularView>::Scalar Scalar;
|
||||
typedef _MatrixType MatrixType;
|
||||
|
||||
protected:
|
||||
typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
|
||||
typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
|
||||
|
||||
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression;
|
||||
|
||||
enum {
|
||||
Mode = _Mode,
|
||||
Flags = internal::traits<TriangularView>::Flags,
|
||||
TransposeMode = (Mode & Upper ? Lower : 0)
|
||||
| (Mode & Lower ? Upper : 0)
|
||||
| (Mode & (UnitDiag))
|
||||
| (Mode & (ZeroDiag)),
|
||||
IsVectorAtCompileTime = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
|
||||
{}
|
||||
|
||||
using Base::operator=;
|
||||
TriangularView& operator=(const TriangularView &other)
|
||||
{ return Base::operator=(other); }
|
||||
|
||||
/** \copydoc EigenBase::rows() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
/** \copydoc EigenBase::cols() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
/** \returns a const reference to the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const NestedExpression& nestedExpression() const { return m_matrix; }
|
||||
|
||||
/** \returns a reference to the nested expression */
|
||||
EIGEN_DEVICE_FUNC
|
||||
NestedExpression& nestedExpression() { return m_matrix; }
|
||||
|
||||
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
|
||||
/** \sa MatrixBase::conjugate() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConjugateReturnType conjugate() const
|
||||
{ return ConjugateReturnType(m_matrix.conjugate()); }
|
||||
|
||||
typedef TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
|
||||
/** \sa MatrixBase::adjoint() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const AdjointReturnType adjoint() const
|
||||
{ return AdjointReturnType(m_matrix.adjoint()); }
|
||||
|
||||
typedef TriangularView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline TransposeReturnType transpose()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||
return TransposeReturnType(tmp);
|
||||
}
|
||||
|
||||
typedef TriangularView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
|
||||
/** \sa MatrixBase::transpose() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ConstTransposeReturnType transpose() const
|
||||
{
|
||||
return ConstTransposeReturnType(m_matrix.transpose());
|
||||
}
|
||||
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const Solve<TriangularView, Other>
|
||||
solve(const MatrixBase<Other>& other) const
|
||||
{ return Solve<TriangularView, Other>(*this, other.derived()); }
|
||||
|
||||
// workaround MSVC ICE
|
||||
#if EIGEN_COMP_MSVC
|
||||
template<int Side, typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const internal::triangular_solve_retval<Side,TriangularView, Other>
|
||||
solve(const MatrixBase<Other>& other) const
|
||||
{ return Base::template solve<Side>(other); }
|
||||
#else
|
||||
using Base::solve;
|
||||
#endif
|
||||
|
||||
/** \returns a selfadjoint view of the referenced triangular part which must be either \c #Upper or \c #Lower.
|
||||
*
|
||||
* This is a shortcut for \code this->nestedExpression().selfadjointView<(*this)::Mode>() \endcode
|
||||
* \sa MatrixBase::selfadjointView() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);
|
||||
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
|
||||
}
|
||||
|
||||
/** This is the const version of selfadjointView() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);
|
||||
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
|
||||
}
|
||||
|
||||
|
||||
/** \returns the determinant of the triangular matrix
|
||||
* \sa MatrixBase::determinant() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Scalar determinant() const
|
||||
{
|
||||
if (Mode & UnitDiag)
|
||||
return 1;
|
||||
else if (Mode & ZeroDiag)
|
||||
return 0;
|
||||
else
|
||||
return m_matrix.diagonal().prod();
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
MatrixTypeNested m_matrix;
|
||||
};
|
||||
|
||||
/** \ingroup Core_Module
|
||||
*
|
||||
* \brief Base class for a triangular part in a \b dense matrix
|
||||
*
|
||||
* This class is an abstract base class of class TriangularView, and objects of type TriangularViewImpl cannot be instantiated.
|
||||
* It extends class TriangularView with additional methods which available for dense expressions only.
|
||||
*
|
||||
* \sa class TriangularView, MatrixBase::triangularView()
|
||||
*/
|
||||
template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
|
||||
: public TriangularBase<TriangularView<_MatrixType, _Mode> >
|
||||
{
|
||||
public:
|
||||
|
||||
typedef TriangularView<_MatrixType, _Mode> TriangularViewType;
|
||||
typedef TriangularBase<TriangularViewType> Base;
|
||||
typedef typename internal::traits<TriangularViewType>::Scalar Scalar;
|
||||
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef typename MatrixType::PlainObject DenseMatrixType;
|
||||
typedef DenseMatrixType PlainObject;
|
||||
|
||||
public:
|
||||
using Base::evalToLazy;
|
||||
using Base::derived;
|
||||
|
||||
typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind;
|
||||
|
||||
enum {
|
||||
Mode = _Mode,
|
||||
Flags = internal::traits<TriangularViewType>::Flags
|
||||
};
|
||||
|
||||
/** \returns the outer-stride of the underlying dense matrix
|
||||
* \sa DenseCoeffsBase::outerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
|
||||
/** \returns the inner-stride of the underlying dense matrix
|
||||
* \sa DenseCoeffsBase::innerStride() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
|
||||
|
||||
/** \sa MatrixBase::operator+=() */
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator+=(const DenseBase<Other>& other) {
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename Other::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
/** \sa MatrixBase::operator-=() */
|
||||
template<typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator-=(const DenseBase<Other>& other) {
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename Other::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
/** \sa MatrixBase::operator*=() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; }
|
||||
/** \sa DenseBase::operator/=() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() / other; }
|
||||
|
||||
/** \sa MatrixBase::fill() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
void fill(const Scalar& value) { setConstant(value); }
|
||||
/** \sa MatrixBase::setConstant() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& setConstant(const Scalar& value)
|
||||
{ return *this = MatrixType::Constant(derived().rows(), derived().cols(), value); }
|
||||
/** \sa MatrixBase::setZero() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& setZero() { return setConstant(Scalar(0)); }
|
||||
/** \sa MatrixBase::setOnes() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& setOnes() { return setConstant(Scalar(1)); }
|
||||
|
||||
/** \sa MatrixBase::coeff()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar coeff(Index row, Index col) const
|
||||
{
|
||||
Base::check_coordinates_internal(row, col);
|
||||
return derived().nestedExpression().coeff(row, col);
|
||||
}
|
||||
|
||||
/** \sa MatrixBase::coeffRef()
|
||||
* \warning the coordinates must fit into the referenced triangular part
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
|
||||
Base::check_coordinates_internal(row, col);
|
||||
return derived().nestedExpression().coeffRef(row, col);
|
||||
}
|
||||
|
||||
/** Assigns a triangular matrix to a triangular part of a dense matrix */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator=(const TriangularBase<OtherDerived>& other);
|
||||
|
||||
/** Shortcut for\code *this = other.other.triangularView<(*this)::Mode>() \endcode */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
EIGEN_DEVICE_FUNC
|
||||
TriangularViewType& operator=(const TriangularViewImpl& other)
|
||||
{ return *this = other.derived().nestedExpression(); }
|
||||
|
||||
/** \deprecated */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void lazyAssign(const TriangularBase<OtherDerived>& other);
|
||||
|
||||
/** \deprecated */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void lazyAssign(const MatrixBase<OtherDerived>& other);
|
||||
#endif
|
||||
|
||||
/** Efficient triangular matrix times vector/matrix product */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<TriangularViewType,OtherDerived>
|
||||
operator*(const MatrixBase<OtherDerived>& rhs) const
|
||||
{
|
||||
return Product<TriangularViewType,OtherDerived>(derived(), rhs.derived());
|
||||
}
|
||||
|
||||
/** Efficient vector/matrix times triangular matrix product */
|
||||
template<typename OtherDerived> friend
|
||||
EIGEN_DEVICE_FUNC
|
||||
const Product<OtherDerived,TriangularViewType>
|
||||
operator*(const MatrixBase<OtherDerived>& lhs, const TriangularViewImpl& rhs)
|
||||
{
|
||||
return Product<OtherDerived,TriangularViewType>(lhs.derived(),rhs.derived());
|
||||
}
|
||||
|
||||
/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
|
||||
*
|
||||
* This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
|
||||
* \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
|
||||
* \a Side==OnTheRight.
|
||||
*
|
||||
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
|
||||
*
|
||||
* The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
|
||||
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
|
||||
* is an upper (resp. lower) triangular matrix.
|
||||
*
|
||||
* Example: \include Triangular_solve.cpp
|
||||
* Output: \verbinclude Triangular_solve.out
|
||||
*
|
||||
* This function returns an expression of the inverse-multiply and can works in-place if it is assigned
|
||||
* to the same matrix or vector \a other.
|
||||
*
|
||||
* For users coming from BLAS, this function (and more specifically solveInPlace()) offer
|
||||
* all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
|
||||
*
|
||||
* \sa TriangularView::solveInPlace()
|
||||
*/
|
||||
template<int Side, typename Other>
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const internal::triangular_solve_retval<Side,TriangularViewType, Other>
|
||||
solve(const MatrixBase<Other>& other) const;
|
||||
|
||||
/** "in-place" version of TriangularView::solve() where the result is written in \a other
|
||||
*
|
||||
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
|
||||
* This function will const_cast it, so constness isn't honored here.
|
||||
*
|
||||
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
|
||||
*
|
||||
* See TriangularView:solve() for the details.
|
||||
*/
|
||||
template<int Side, typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void solveInPlace(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void solveInPlace(const MatrixBase<OtherDerived>& other) const
|
||||
{ return solveInPlace<OnTheLeft>(other); }
|
||||
|
||||
/** Swaps the coefficients of the common triangular parts of two matrices */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
#ifdef EIGEN_PARSED_BY_DOXYGEN
|
||||
void swap(TriangularBase<OtherDerived> &other)
|
||||
#else
|
||||
void swap(TriangularBase<OtherDerived> const & other)
|
||||
#endif
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
|
||||
call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
/** \deprecated
|
||||
* Shortcut for \code (*this).swap(other.triangularView<(*this)::Mode>()) \endcode */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void swap(MatrixBase<OtherDerived> const & other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
|
||||
call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
|
||||
}
|
||||
|
||||
template<typename RhsType, typename DstType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
|
||||
if(!internal::is_same_dense(dst,rhs))
|
||||
dst = rhs;
|
||||
this->solveInPlace(dst);
|
||||
}
|
||||
|
||||
template<typename ProductType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of triangular evaluation/assignment
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
// FIXME should we keep that possibility
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename OtherDerived>
|
||||
inline TriangularView<MatrixType, Mode>&
|
||||
TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
// FIXME should we keep that possibility
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename OtherDerived>
|
||||
void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename OtherDerived>
|
||||
inline TriangularView<MatrixType, Mode>&
|
||||
TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other)
|
||||
{
|
||||
eigen_assert(Mode == int(OtherDerived::Mode));
|
||||
internal::call_assignment(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
template<typename OtherDerived>
|
||||
void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
|
||||
{
|
||||
eigen_assert(Mode == int(OtherDerived::Mode));
|
||||
internal::call_assignment_no_alias(derived(), other.derived());
|
||||
}
|
||||
#endif
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of TriangularBase methods
|
||||
***************************************************************************/
|
||||
|
||||
/** Assigns a triangular or selfadjoint matrix to a dense matrix.
|
||||
* If the matrix is triangular, the opposite part is set to zero. */
|
||||
template<typename Derived>
|
||||
template<typename DenseDerived>
|
||||
void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
|
||||
{
|
||||
evalToLazy(other.derived());
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of TriangularView methods
|
||||
***************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of MatrixBase methods
|
||||
***************************************************************************/
|
||||
|
||||
/**
|
||||
* \returns an expression of a triangular view extracted from the current matrix
|
||||
*
|
||||
* The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
|
||||
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
|
||||
*
|
||||
* Example: \include MatrixBase_triangularView.cpp
|
||||
* Output: \verbinclude MatrixBase_triangularView.out
|
||||
*
|
||||
* \sa class TriangularView
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<unsigned int Mode>
|
||||
typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
|
||||
MatrixBase<Derived>::triangularView()
|
||||
{
|
||||
return typename TriangularViewReturnType<Mode>::Type(derived());
|
||||
}
|
||||
|
||||
/** This is the const version of MatrixBase::triangularView() */
|
||||
template<typename Derived>
|
||||
template<unsigned int Mode>
|
||||
typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
|
||||
MatrixBase<Derived>::triangularView() const
|
||||
{
|
||||
return typename ConstTriangularViewReturnType<Mode>::Type(derived());
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately equal to an upper triangular matrix,
|
||||
* within the precision given by \a prec.
|
||||
*
|
||||
* \sa isLowerTriangular()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
|
||||
{
|
||||
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
{
|
||||
Index maxi = numext::mini(j, rows()-1);
|
||||
for(Index i = 0; i <= maxi; ++i)
|
||||
{
|
||||
RealScalar absValue = numext::abs(coeff(i,j));
|
||||
if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
|
||||
}
|
||||
}
|
||||
RealScalar threshold = maxAbsOnUpperPart * prec;
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = j+1; i < rows(); ++i)
|
||||
if(numext::abs(coeff(i, j)) > threshold) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \returns true if *this is approximately equal to a lower triangular matrix,
|
||||
* within the precision given by \a prec.
|
||||
*
|
||||
* \sa isUpperTriangular()
|
||||
*/
|
||||
template<typename Derived>
|
||||
bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
|
||||
{
|
||||
RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
|
||||
for(Index j = 0; j < cols(); ++j)
|
||||
for(Index i = j; i < rows(); ++i)
|
||||
{
|
||||
RealScalar absValue = numext::abs(coeff(i,j));
|
||||
if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
|
||||
}
|
||||
RealScalar threshold = maxAbsOnLowerPart * prec;
|
||||
for(Index j = 1; j < cols(); ++j)
|
||||
{
|
||||
Index maxi = numext::mini(j, rows()-1);
|
||||
for(Index i = 0; i < maxi; ++i)
|
||||
if(numext::abs(coeff(i, j)) > threshold) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
* Evaluators and Assignment of triangular expressions
|
||||
***************************************************************************
|
||||
***************************************************************************/
|
||||
|
||||
namespace internal {
|
||||
|
||||
|
||||
// TODO currently a triangular expression has the form TriangularView<.,.>
|
||||
// in the future triangular-ness should be defined by the expression traits
|
||||
// such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
struct evaluator_traits<TriangularView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
|
||||
};
|
||||
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
|
||||
: evaluator<typename internal::remove_all<MatrixType>::type>
|
||||
{
|
||||
typedef TriangularView<MatrixType,Mode> XprType;
|
||||
typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
|
||||
unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
|
||||
};
|
||||
|
||||
// Additional assignment kinds:
|
||||
struct Triangular2Triangular {};
|
||||
struct Triangular2Dense {};
|
||||
struct Dense2Triangular {};
|
||||
|
||||
|
||||
template<typename Kernel, unsigned int Mode, int UnrollCount, bool ClearOpposite> struct triangular_assignment_loop;
|
||||
|
||||
|
||||
/** \internal Specialization of the dense assignment kernel for triangular matrices.
|
||||
* The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions.
|
||||
* \tparam UpLo must be either Lower or Upper
|
||||
* \tparam Mode must be either 0, UnitDiag, ZeroDiag, or SelfAdjoint
|
||||
*/
|
||||
template<int UpLo, int Mode, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
|
||||
class triangular_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
|
||||
{
|
||||
protected:
|
||||
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
|
||||
typedef typename Base::DstXprType DstXprType;
|
||||
typedef typename Base::SrcXprType SrcXprType;
|
||||
using Base::m_dst;
|
||||
using Base::m_src;
|
||||
using Base::m_functor;
|
||||
public:
|
||||
|
||||
typedef typename Base::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::AssignmentTraits AssignmentTraits;
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
|
||||
: Base(dst, src, func, dstExpr)
|
||||
{}
|
||||
|
||||
#ifdef EIGEN_INTERNAL_DEBUGGING
|
||||
EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
|
||||
{
|
||||
eigen_internal_assert(row!=col);
|
||||
Base::assignCoeff(row,col);
|
||||
}
|
||||
#else
|
||||
using Base::assignCoeff;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
|
||||
{
|
||||
if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1));
|
||||
else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0));
|
||||
else if(Mode==0) Base::assignCoeff(id,id);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col)
|
||||
{
|
||||
eigen_internal_assert(row!=col);
|
||||
if(SetOpposite)
|
||||
m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0));
|
||||
}
|
||||
};
|
||||
|
||||
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
{
|
||||
typedef evaluator<DstXprType> DstEvaluatorType;
|
||||
typedef evaluator<SrcXprType> SrcEvaluatorType;
|
||||
|
||||
SrcEvaluatorType srcEvaluator(src);
|
||||
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
DstEvaluatorType dstEvaluator(dst);
|
||||
|
||||
typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
|
||||
DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
|
||||
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
|
||||
|
||||
enum {
|
||||
unroll = DstXprType::SizeAtCompileTime != Dynamic
|
||||
&& SrcEvaluatorType::CoeffReadCost < HugeCost
|
||||
&& DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
|
||||
};
|
||||
|
||||
triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
|
||||
}
|
||||
|
||||
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)
|
||||
{
|
||||
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
|
||||
}
|
||||
|
||||
template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };
|
||||
template<> struct AssignmentKind<DenseShape,TriangularShape> { typedef Triangular2Dense Kind; };
|
||||
template<> struct AssignmentKind<TriangularShape,DenseShape> { typedef Dense2Triangular Kind; };
|
||||
|
||||
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode));
|
||||
|
||||
call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||
{
|
||||
call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite>
|
||||
struct triangular_assignment_loop
|
||||
{
|
||||
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
|
||||
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
|
||||
typedef typename DstEvaluatorType::XprType DstXprType;
|
||||
|
||||
enum {
|
||||
col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,
|
||||
row = (UnrollCount-1) % DstXprType::RowsAtCompileTime
|
||||
};
|
||||
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Kernel &kernel)
|
||||
{
|
||||
triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel);
|
||||
|
||||
if(row==col)
|
||||
kernel.assignDiagonalCoeff(row);
|
||||
else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row<col) )
|
||||
kernel.assignCoeff(row,col);
|
||||
else if(SetOpposite)
|
||||
kernel.assignOppositeCoeff(row,col);
|
||||
}
|
||||
};
|
||||
|
||||
// prevent buggy user code from causing an infinite recursion
|
||||
template<typename Kernel, unsigned int Mode, bool SetOpposite>
|
||||
struct triangular_assignment_loop<Kernel, Mode, 0, SetOpposite>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Kernel &) {}
|
||||
};
|
||||
|
||||
|
||||
|
||||
// TODO: experiment with a recursive assignment procedure splitting the current
|
||||
// triangular part into one rectangular and two triangular parts.
|
||||
|
||||
|
||||
template<typename Kernel, unsigned int Mode, bool SetOpposite>
|
||||
struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
|
||||
{
|
||||
typedef typename Kernel::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Kernel &kernel)
|
||||
{
|
||||
for(Index j = 0; j < kernel.cols(); ++j)
|
||||
{
|
||||
Index maxi = numext::mini(j, kernel.rows());
|
||||
Index i = 0;
|
||||
if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
|
||||
{
|
||||
for(; i < maxi; ++i)
|
||||
if(Mode&Upper) kernel.assignCoeff(i, j);
|
||||
else kernel.assignOppositeCoeff(i, j);
|
||||
}
|
||||
else
|
||||
i = maxi;
|
||||
|
||||
if(i<kernel.rows()) // then i==j
|
||||
kernel.assignDiagonalCoeff(i++);
|
||||
|
||||
if (((Mode&Upper) && SetOpposite) || (Mode&Lower))
|
||||
{
|
||||
for(; i < kernel.rows(); ++i)
|
||||
if(Mode&Lower) kernel.assignCoeff(i, j);
|
||||
else kernel.assignOppositeCoeff(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** Assigns a triangular or selfadjoint matrix to a dense matrix.
|
||||
* If the matrix is triangular, the opposite part is set to zero. */
|
||||
template<typename Derived>
|
||||
template<typename DenseDerived>
|
||||
void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
|
||||
{
|
||||
other.derived().resize(this->rows(), this->cols());
|
||||
internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Triangular = Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
dst._assignProduct(src, 1, 0);
|
||||
}
|
||||
};
|
||||
|
||||
// Triangular += Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst._assignProduct(src, 1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
// Triangular -= Product
|
||||
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
|
||||
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
|
||||
{
|
||||
dst._assignProduct(src, -1, 1);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TRIANGULARMATRIX_H
|
||||
96
external/include/eigen3/Eigen/src/Core/VectorBlock.h
vendored
Normal file
96
external/include/eigen3/Eigen/src/Core/VectorBlock.h
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_VECTORBLOCK_H
|
||||
#define EIGEN_VECTORBLOCK_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
template<typename VectorType, int Size>
|
||||
struct traits<VectorBlock<VectorType, Size> >
|
||||
: public traits<Block<VectorType,
|
||||
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
|
||||
{
|
||||
};
|
||||
}
|
||||
|
||||
/** \class VectorBlock
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Expression of a fixed-size or dynamic-size sub-vector
|
||||
*
|
||||
* \tparam VectorType the type of the object in which we are taking a sub-vector
|
||||
* \tparam Size size of the sub-vector we are taking at compile time (optional)
|
||||
*
|
||||
* This class represents an expression of either a fixed-size or dynamic-size sub-vector.
|
||||
* It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
|
||||
* most of the time this is the only way it is used.
|
||||
*
|
||||
* However, if you want to directly maniputate sub-vector expressions,
|
||||
* for instance if you want to write a function returning such an expression, you
|
||||
* will need to use this class.
|
||||
*
|
||||
* Here is an example illustrating the dynamic case:
|
||||
* \include class_VectorBlock.cpp
|
||||
* Output: \verbinclude class_VectorBlock.out
|
||||
*
|
||||
* \note Even though this expression has dynamic size, in the case where \a VectorType
|
||||
* has fixed size, this expression inherits a fixed maximal size which means that evaluating
|
||||
* it does not cause a dynamic memory allocation.
|
||||
*
|
||||
* Here is an example illustrating the fixed-size case:
|
||||
* \include class_FixedVectorBlock.cpp
|
||||
* Output: \verbinclude class_FixedVectorBlock.out
|
||||
*
|
||||
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
|
||||
*/
|
||||
template<typename VectorType, int Size> class VectorBlock
|
||||
: public Block<VectorType,
|
||||
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1>
|
||||
{
|
||||
typedef Block<VectorType,
|
||||
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
|
||||
internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1> Base;
|
||||
enum {
|
||||
IsColVector = !(internal::traits<VectorType>::Flags & RowMajorBit)
|
||||
};
|
||||
public:
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)
|
||||
|
||||
using Base::operator=;
|
||||
|
||||
/** Dynamic-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline VectorBlock(VectorType& vector, Index start, Index size)
|
||||
: Base(vector,
|
||||
IsColVector ? start : 0, IsColVector ? 0 : start,
|
||||
IsColVector ? size : 1, IsColVector ? 1 : size)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
|
||||
}
|
||||
|
||||
/** Fixed-size constructor
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline VectorBlock(VectorType& vector, Index start)
|
||||
: Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_VECTORBLOCK_H
|
||||
695
external/include/eigen3/Eigen/src/Core/VectorwiseOp.h
vendored
Normal file
695
external/include/eigen3/Eigen/src/Core/VectorwiseOp.h
vendored
Normal file
@@ -0,0 +1,695 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PARTIAL_REDUX_H
|
||||
#define EIGEN_PARTIAL_REDUX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class PartialReduxExpr
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Generic expression of a partially reduxed matrix
|
||||
*
|
||||
* \tparam MatrixType the type of the matrix we are applying the redux operation
|
||||
* \tparam MemberOp type of the member functor
|
||||
* \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
|
||||
*
|
||||
* This class represents an expression of a partial redux operator of a matrix.
|
||||
* It is the return type of some VectorwiseOp functions,
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* \sa class VectorwiseOp
|
||||
*/
|
||||
|
||||
template< typename MatrixType, typename MemberOp, int Direction>
|
||||
class PartialReduxExpr;
|
||||
|
||||
namespace internal {
|
||||
template<typename MatrixType, typename MemberOp, int Direction>
|
||||
struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
|
||||
: traits<MatrixType>
|
||||
{
|
||||
typedef typename MemberOp::result_type Scalar;
|
||||
typedef typename traits<MatrixType>::StorageKind StorageKind;
|
||||
typedef typename traits<MatrixType>::XprKind XprKind;
|
||||
typedef typename MatrixType::Scalar InputScalar;
|
||||
enum {
|
||||
RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
|
||||
ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
|
||||
Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
|
||||
TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
template< typename MatrixType, typename MemberOp, int Direction>
|
||||
class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type,
|
||||
internal::no_assignment_operator
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
|
||||
EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
|
||||
: m_matrix(mat), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename MatrixType::Nested nestedExpression() const { return m_matrix; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MemberOp& functor() const { return m_functor; }
|
||||
|
||||
protected:
|
||||
typename MatrixType::Nested m_matrix;
|
||||
const MemberOp m_functor;
|
||||
};
|
||||
|
||||
#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST) \
|
||||
template <typename ResultType> \
|
||||
struct member_##MEMBER { \
|
||||
EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER) \
|
||||
typedef ResultType result_type; \
|
||||
template<typename Scalar, int Size> struct Cost \
|
||||
{ enum { value = COST }; }; \
|
||||
template<typename XprType> \
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
||||
ResultType operator()(const XprType& mat) const \
|
||||
{ return mat.MEMBER(); } \
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );
|
||||
EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
|
||||
EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
|
||||
EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
|
||||
|
||||
template <int p, typename ResultType>
|
||||
struct member_lpnorm {
|
||||
typedef ResultType result_type;
|
||||
template<typename Scalar, int Size> struct Cost
|
||||
{ enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
|
||||
EIGEN_DEVICE_FUNC member_lpnorm() {}
|
||||
template<typename XprType>
|
||||
EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
|
||||
{ return mat.template lpNorm<p>(); }
|
||||
};
|
||||
|
||||
template <typename BinaryOp, typename Scalar>
|
||||
struct member_redux {
|
||||
typedef typename result_of<
|
||||
BinaryOp(const Scalar&,const Scalar&)
|
||||
>::type result_type;
|
||||
template<typename _Scalar, int Size> struct Cost
|
||||
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
|
||||
EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const
|
||||
{ return mat.redux(m_functor); }
|
||||
const BinaryOp m_functor;
|
||||
};
|
||||
}
|
||||
|
||||
/** \class VectorwiseOp
|
||||
* \ingroup Core_Module
|
||||
*
|
||||
* \brief Pseudo expression providing partial reduction operations
|
||||
*
|
||||
* \tparam ExpressionType the type of the object on which to do partial reductions
|
||||
* \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
|
||||
*
|
||||
* This class represents a pseudo expression with partial reduction features.
|
||||
* It is the return type of DenseBase::colwise() and DenseBase::rowwise()
|
||||
* and most of the time this is the only way it is used.
|
||||
*
|
||||
* Example: \include MatrixBase_colwise.cpp
|
||||
* Output: \verbinclude MatrixBase_colwise.out
|
||||
*
|
||||
* \sa DenseBase::colwise(), DenseBase::rowwise(), class PartialReduxExpr
|
||||
*/
|
||||
template<typename ExpressionType, int Direction> class VectorwiseOp
|
||||
{
|
||||
public:
|
||||
|
||||
typedef typename ExpressionType::Scalar Scalar;
|
||||
typedef typename ExpressionType::RealScalar RealScalar;
|
||||
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
|
||||
typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
|
||||
typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
|
||||
|
||||
template<template<typename _Scalar> class Functor,
|
||||
typename Scalar_=Scalar> struct ReturnType
|
||||
{
|
||||
typedef PartialReduxExpr<ExpressionType,
|
||||
Functor<Scalar_>,
|
||||
Direction
|
||||
> Type;
|
||||
};
|
||||
|
||||
template<typename BinaryOp> struct ReduxReturnType
|
||||
{
|
||||
typedef PartialReduxExpr<ExpressionType,
|
||||
internal::member_redux<BinaryOp,Scalar>,
|
||||
Direction
|
||||
> Type;
|
||||
};
|
||||
|
||||
enum {
|
||||
isVertical = (Direction==Vertical) ? 1 : 0,
|
||||
isHorizontal = (Direction==Horizontal) ? 1 : 0
|
||||
};
|
||||
|
||||
protected:
|
||||
|
||||
typedef typename internal::conditional<isVertical,
|
||||
typename ExpressionType::ColXpr,
|
||||
typename ExpressionType::RowXpr>::type SubVector;
|
||||
/** \internal
|
||||
* \returns the i-th subvector according to the \c Direction */
|
||||
EIGEN_DEVICE_FUNC
|
||||
SubVector subVector(Index i)
|
||||
{
|
||||
return SubVector(m_matrix.derived(),i);
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* \returns the number of subvectors in the direction \c Direction */
|
||||
EIGEN_DEVICE_FUNC
|
||||
Index subVectors() const
|
||||
{ return isVertical?m_matrix.cols():m_matrix.rows(); }
|
||||
|
||||
template<typename OtherDerived> struct ExtendedType {
|
||||
typedef Replicate<OtherDerived,
|
||||
isVertical ? 1 : ExpressionType::RowsAtCompileTime,
|
||||
isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* Replicates a vector to match the size of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename ExtendedType<OtherDerived>::Type
|
||||
extendedTo(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
|
||||
return typename ExtendedType<OtherDerived>::Type
|
||||
(other.derived(),
|
||||
isVertical ? 1 : m_matrix.rows(),
|
||||
isHorizontal ? 1 : m_matrix.cols());
|
||||
}
|
||||
|
||||
template<typename OtherDerived> struct OppositeExtendedType {
|
||||
typedef Replicate<OtherDerived,
|
||||
isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
|
||||
isVertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* Replicates a vector in the opposite direction to match the size of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename OppositeExtendedType<OtherDerived>::Type
|
||||
extendedToOpposite(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
|
||||
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
|
||||
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
|
||||
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
|
||||
return typename OppositeExtendedType<OtherDerived>::Type
|
||||
(other.derived(),
|
||||
isHorizontal ? 1 : m_matrix.rows(),
|
||||
isVertical ? 1 : m_matrix.cols());
|
||||
}
|
||||
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
|
||||
|
||||
/** \internal */
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const ExpressionType& _expression() const { return m_matrix; }
|
||||
|
||||
/** \returns a row or column vector expression of \c *this reduxed by \a func
|
||||
*
|
||||
* The template parameter \a BinaryOp is the type of the functor
|
||||
* of the custom redux operator. Note that func must be an associative operator.
|
||||
*
|
||||
* \sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise()
|
||||
*/
|
||||
template<typename BinaryOp>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename ReduxReturnType<BinaryOp>::Type
|
||||
redux(const BinaryOp& func = BinaryOp()) const
|
||||
{ return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
|
||||
|
||||
typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType;
|
||||
typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType;
|
||||
typedef typename ReturnType<internal::member_squaredNorm,RealScalar>::Type SquaredNormReturnType;
|
||||
typedef typename ReturnType<internal::member_norm,RealScalar>::Type NormReturnType;
|
||||
typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType;
|
||||
typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType;
|
||||
typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType;
|
||||
typedef typename ReturnType<internal::member_sum>::Type SumReturnType;
|
||||
typedef typename ReturnType<internal::member_mean>::Type MeanReturnType;
|
||||
typedef typename ReturnType<internal::member_all>::Type AllReturnType;
|
||||
typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
|
||||
typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
|
||||
typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
|
||||
typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;
|
||||
typedef Reverse<ExpressionType, Direction> ReverseReturnType;
|
||||
|
||||
template<int p> struct LpNormReturnType {
|
||||
typedef PartialReduxExpr<ExpressionType, internal::member_lpnorm<p,RealScalar>,Direction> Type;
|
||||
};
|
||||
|
||||
/** \returns a row (or column) vector expression of the smallest coefficient
|
||||
* of each column (or row) of the referenced expression.
|
||||
*
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* Example: \include PartialRedux_minCoeff.cpp
|
||||
* Output: \verbinclude PartialRedux_minCoeff.out
|
||||
*
|
||||
* \sa DenseBase::minCoeff() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MinCoeffReturnType minCoeff() const
|
||||
{ return MinCoeffReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression of the largest coefficient
|
||||
* of each column (or row) of the referenced expression.
|
||||
*
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* Example: \include PartialRedux_maxCoeff.cpp
|
||||
* Output: \verbinclude PartialRedux_maxCoeff.out
|
||||
*
|
||||
* \sa DenseBase::maxCoeff() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MaxCoeffReturnType maxCoeff() const
|
||||
{ return MaxCoeffReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression of the squared norm
|
||||
* of each column (or row) of the referenced expression.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* Example: \include PartialRedux_squaredNorm.cpp
|
||||
* Output: \verbinclude PartialRedux_squaredNorm.out
|
||||
*
|
||||
* \sa DenseBase::squaredNorm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SquaredNormReturnType squaredNorm() const
|
||||
{ return SquaredNormReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* Example: \include PartialRedux_norm.cpp
|
||||
* Output: \verbinclude PartialRedux_norm.out
|
||||
*
|
||||
* \sa DenseBase::norm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const NormReturnType norm() const
|
||||
{ return NormReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* Example: \include PartialRedux_norm.cpp
|
||||
* Output: \verbinclude PartialRedux_norm.out
|
||||
*
|
||||
* \sa DenseBase::norm() */
|
||||
template<int p>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename LpNormReturnType<p>::Type lpNorm() const
|
||||
{ return typename LpNormReturnType<p>::Type(_expression()); }
|
||||
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, using
|
||||
* Blue's algorithm.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::blueNorm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const BlueNormReturnType blueNorm() const
|
||||
{ return BlueNormReturnType(_expression()); }
|
||||
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, avoiding
|
||||
* underflow and overflow.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::stableNorm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const StableNormReturnType stableNorm() const
|
||||
{ return StableNormReturnType(_expression()); }
|
||||
|
||||
|
||||
/** \returns a row (or column) vector expression of the norm
|
||||
* of each column (or row) of the referenced expression, avoiding
|
||||
* underflow and overflow using a concatenation of hypot() calls.
|
||||
* This is a vector with real entries, even if the original matrix has complex entries.
|
||||
*
|
||||
* \sa DenseBase::hypotNorm() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const HypotNormReturnType hypotNorm() const
|
||||
{ return HypotNormReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression of the sum
|
||||
* of each column (or row) of the referenced expression.
|
||||
*
|
||||
* Example: \include PartialRedux_sum.cpp
|
||||
* Output: \verbinclude PartialRedux_sum.out
|
||||
*
|
||||
* \sa DenseBase::sum() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const SumReturnType sum() const
|
||||
{ return SumReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression of the mean
|
||||
* of each column (or row) of the referenced expression.
|
||||
*
|
||||
* \sa DenseBase::mean() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const MeanReturnType mean() const
|
||||
{ return MeanReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* whether \b all coefficients of each respective column (or row) are \c true.
|
||||
* This expression can be assigned to a vector with entries of type \c bool.
|
||||
*
|
||||
* \sa DenseBase::all() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const AllReturnType all() const
|
||||
{ return AllReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* whether \b at \b least one coefficient of each respective column (or row) is \c true.
|
||||
* This expression can be assigned to a vector with entries of type \c bool.
|
||||
*
|
||||
* \sa DenseBase::any() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const AnyReturnType any() const
|
||||
{ return AnyReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression representing
|
||||
* the number of \c true coefficients of each respective column (or row).
|
||||
* This expression can be assigned to a vector whose entries have the same type as is used to
|
||||
* index entries of the original matrix; for dense matrices, this is \c std::ptrdiff_t .
|
||||
*
|
||||
* Example: \include PartialRedux_count.cpp
|
||||
* Output: \verbinclude PartialRedux_count.out
|
||||
*
|
||||
* \sa DenseBase::count() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const CountReturnType count() const
|
||||
{ return CountReturnType(_expression()); }
|
||||
|
||||
/** \returns a row (or column) vector expression of the product
|
||||
* of each column (or row) of the referenced expression.
|
||||
*
|
||||
* Example: \include PartialRedux_prod.cpp
|
||||
* Output: \verbinclude PartialRedux_prod.out
|
||||
*
|
||||
* \sa DenseBase::prod() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ProdReturnType prod() const
|
||||
{ return ProdReturnType(_expression()); }
|
||||
|
||||
|
||||
/** \returns a matrix expression
|
||||
* where each column (or row) are reversed.
|
||||
*
|
||||
* Example: \include Vectorwise_reverse.cpp
|
||||
* Output: \verbinclude Vectorwise_reverse.out
|
||||
*
|
||||
* \sa DenseBase::reverse() */
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ConstReverseReturnType reverse() const
|
||||
{ return ConstReverseReturnType( _expression() ); }
|
||||
|
||||
/** \returns a writable matrix expression
|
||||
* where each column (or row) are reversed.
|
||||
*
|
||||
* \sa reverse() const */
|
||||
EIGEN_DEVICE_FUNC
|
||||
ReverseReturnType reverse()
|
||||
{ return ReverseReturnType( _expression() ); }
|
||||
|
||||
typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
const ReplicateReturnType replicate(Index factor) const;
|
||||
|
||||
/**
|
||||
* \return an expression of the replication of each column (or row) of \c *this
|
||||
*
|
||||
* Example: \include DirectionWise_replicate.cpp
|
||||
* Output: \verbinclude DirectionWise_replicate.out
|
||||
*
|
||||
* \sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate
|
||||
*/
|
||||
// NOTE implemented here because of sunstudio's compilation errors
|
||||
// isVertical*Factor+isHorizontal instead of (isVertical?Factor:1) to handle CUDA bug with ternary operator
|
||||
template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>
|
||||
EIGEN_DEVICE_FUNC
|
||||
replicate(Index factor = Factor) const
|
||||
{
|
||||
return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>
|
||||
(_expression(),isVertical?factor:1,isHorizontal?factor:1);
|
||||
}
|
||||
|
||||
/////////// Artithmetic operators ///////////
|
||||
|
||||
/** Copies the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
//eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
|
||||
return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
|
||||
}
|
||||
|
||||
/** Adds the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
|
||||
}
|
||||
|
||||
/** Substracts the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
|
||||
}
|
||||
|
||||
/** Multiples each subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
m_matrix *= extendedTo(other.derived());
|
||||
return const_cast<ExpressionType&>(m_matrix);
|
||||
}
|
||||
|
||||
/** Divides each subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
m_matrix /= extendedTo(other.derived());
|
||||
return const_cast<ExpressionType&>(m_matrix);
|
||||
}
|
||||
|
||||
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator+(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return m_matrix + extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator-(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return m_matrix - extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** Returns the expression where each subvector is the product of the vector \a other
|
||||
* by the corresponding subvector of \c *this */
|
||||
template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_product_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
EIGEN_DEVICE_FUNC
|
||||
operator*(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return m_matrix * extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** Returns the expression where each subvector is the quotient of the corresponding
|
||||
* subvector of \c *this by the vector \a other */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
operator/(const DenseBase<OtherDerived>& other) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
return m_matrix / extendedTo(other.derived());
|
||||
}
|
||||
|
||||
/** \returns an expression where each column (or row) of the referenced matrix are normalized.
|
||||
* The referenced matrix is \b not modified.
|
||||
* \sa MatrixBase::normalized(), normalize()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
|
||||
const ExpressionTypeNestedCleaned,
|
||||
const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
|
||||
normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
|
||||
|
||||
|
||||
/** Normalize in-place each row or columns of the referenced matrix.
|
||||
* \sa MatrixBase::normalize(), normalized()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC void normalize() {
|
||||
m_matrix = this->normalized();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void reverseInPlace();
|
||||
|
||||
/////////// Geometry module ///////////
|
||||
|
||||
typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
|
||||
EIGEN_DEVICE_FUNC
|
||||
HomogeneousReturnType homogeneous() const;
|
||||
|
||||
typedef typename ExpressionType::PlainObject CrossReturnType;
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
|
||||
|
||||
enum {
|
||||
HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime
|
||||
: internal::traits<ExpressionType>::ColsAtCompileTime,
|
||||
HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
|
||||
};
|
||||
typedef Block<const ExpressionType,
|
||||
Direction==Vertical ? int(HNormalized_SizeMinusOne)
|
||||
: int(internal::traits<ExpressionType>::RowsAtCompileTime),
|
||||
Direction==Horizontal ? int(HNormalized_SizeMinusOne)
|
||||
: int(internal::traits<ExpressionType>::ColsAtCompileTime)>
|
||||
HNormalized_Block;
|
||||
typedef Block<const ExpressionType,
|
||||
Direction==Vertical ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
|
||||
Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
|
||||
HNormalized_Factors;
|
||||
typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,
|
||||
const HNormalized_Block,
|
||||
const Replicate<HNormalized_Factors,
|
||||
Direction==Vertical ? HNormalized_SizeMinusOne : 1,
|
||||
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
|
||||
HNormalizedReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const HNormalizedReturnType hnormalized() const;
|
||||
|
||||
protected:
|
||||
ExpressionTypeNested m_matrix;
|
||||
};
|
||||
|
||||
//const colwise moved to DenseBase.h due to CUDA compiler bug
|
||||
|
||||
|
||||
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::ColwiseReturnType
|
||||
DenseBase<Derived>::colwise()
|
||||
{
|
||||
return ColwiseReturnType(derived());
|
||||
}
|
||||
|
||||
//const rowwise moved to DenseBase.h due to CUDA compiler bug
|
||||
|
||||
|
||||
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
|
||||
*
|
||||
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline typename DenseBase<Derived>::RowwiseReturnType
|
||||
DenseBase<Derived>::rowwise()
|
||||
{
|
||||
return RowwiseReturnType(derived());
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PARTIAL_REDUX_H
|
||||
273
external/include/eigen3/Eigen/src/Core/Visitor.h
vendored
Normal file
273
external/include/eigen3/Eigen/src/Core/Visitor.h
vendored
Normal file
@@ -0,0 +1,273 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_VISITOR_H
|
||||
#define EIGEN_VISITOR_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Visitor, typename Derived, int UnrollCount>
|
||||
struct visitor_impl
|
||||
{
|
||||
enum {
|
||||
col = (UnrollCount-1) / Derived::RowsAtCompileTime,
|
||||
row = (UnrollCount-1) % Derived::RowsAtCompileTime
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived &mat, Visitor& visitor)
|
||||
{
|
||||
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
|
||||
visitor(mat.coeff(row, col), row, col);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, 1>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived &mat, Visitor& visitor)
|
||||
{
|
||||
return visitor.init(mat.coeff(0, 0), 0, 0);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Visitor, typename Derived>
|
||||
struct visitor_impl<Visitor, Derived, Dynamic>
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(const Derived& mat, Visitor& visitor)
|
||||
{
|
||||
visitor.init(mat.coeff(0,0), 0, 0);
|
||||
for(Index i = 1; i < mat.rows(); ++i)
|
||||
visitor(mat.coeff(i, 0), i, 0);
|
||||
for(Index j = 1; j < mat.cols(); ++j)
|
||||
for(Index i = 0; i < mat.rows(); ++i)
|
||||
visitor(mat.coeff(i, j), i, j);
|
||||
}
|
||||
};
|
||||
|
||||
// evaluator adaptor
|
||||
template<typename XprType>
|
||||
class visitor_evaluator
|
||||
{
|
||||
public:
|
||||
EIGEN_DEVICE_FUNC
|
||||
explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
|
||||
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = XprType::RowsAtCompileTime,
|
||||
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
|
||||
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
|
||||
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
||||
{ return m_evaluator.coeff(row, col); }
|
||||
|
||||
protected:
|
||||
internal::evaluator<XprType> m_evaluator;
|
||||
const XprType &m_xpr;
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
|
||||
*
|
||||
* The template parameter \a Visitor is the type of the visitor and provides the following interface:
|
||||
* \code
|
||||
* struct MyVisitor {
|
||||
* // called for the first coefficient
|
||||
* void init(const Scalar& value, Index i, Index j);
|
||||
* // called for all other coefficients
|
||||
* void operator() (const Scalar& value, Index i, Index j);
|
||||
* };
|
||||
* \endcode
|
||||
*
|
||||
* \note compared to one or two \em for \em loops, visitors offer automatic
|
||||
* unrolling for small fixed size matrix.
|
||||
*
|
||||
* \sa minCoeff(Index*,Index*), maxCoeff(Index*,Index*), DenseBase::redux()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename Visitor>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void DenseBase<Derived>::visit(Visitor& visitor) const
|
||||
{
|
||||
typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
|
||||
ThisEvaluator thisEval(derived());
|
||||
|
||||
enum {
|
||||
unroll = SizeAtCompileTime != Dynamic
|
||||
&& SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT
|
||||
};
|
||||
return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \brief Base class to implement min and max visitors
|
||||
*/
|
||||
template <typename Derived>
|
||||
struct coeff_visitor
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
Index row, col;
|
||||
Scalar res;
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void init(const Scalar& value, Index i, Index j)
|
||||
{
|
||||
res = value;
|
||||
row = i;
|
||||
col = j;
|
||||
}
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Visitor computing the min coefficient with its value and coordinates
|
||||
*
|
||||
* \sa DenseBase::minCoeff(Index*, Index*)
|
||||
*/
|
||||
template <typename Derived>
|
||||
struct min_coeff_visitor : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if(value < this->res)
|
||||
{
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct functor_traits<min_coeff_visitor<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Visitor computing the max coefficient with its value and coordinates
|
||||
*
|
||||
* \sa DenseBase::maxCoeff(Index*, Index*)
|
||||
*/
|
||||
template <typename Derived>
|
||||
struct max_coeff_visitor : coeff_visitor<Derived>
|
||||
{
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
EIGEN_DEVICE_FUNC
|
||||
void operator() (const Scalar& value, Index i, Index j)
|
||||
{
|
||||
if(value > this->res)
|
||||
{
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct functor_traits<max_coeff_visitor<Scalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<Scalar>::AddCost
|
||||
};
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
{
|
||||
internal::min_coeff_visitor<Derived> minVisitor;
|
||||
this->visit(minVisitor);
|
||||
*rowId = minVisitor.row;
|
||||
if (colId) *colId = minVisitor.col;
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::minCoeff(IndexType* index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
internal::min_coeff_visitor<Derived> minVisitor;
|
||||
this->visit(minVisitor);
|
||||
*index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
|
||||
{
|
||||
internal::max_coeff_visitor<Derived> maxVisitor;
|
||||
this->visit(maxVisitor);
|
||||
*rowPtr = maxVisitor.row;
|
||||
if (colPtr) *colPtr = maxVisitor.col;
|
||||
return maxVisitor.res;
|
||||
}
|
||||
|
||||
/** \returns the maximum of all coefficients of *this and puts in *index its location.
|
||||
* \warning the result is undefined if \c *this contains NaN.
|
||||
*
|
||||
* \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename IndexType>
|
||||
EIGEN_DEVICE_FUNC
|
||||
typename internal::traits<Derived>::Scalar
|
||||
DenseBase<Derived>::maxCoeff(IndexType* index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
internal::max_coeff_visitor<Derived> maxVisitor;
|
||||
this->visit(maxVisitor);
|
||||
*index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
|
||||
return maxVisitor.res;
|
||||
}
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_VISITOR_H
|
||||
451
external/include/eigen3/Eigen/src/Core/arch/AVX/Complex.h
vendored
Normal file
451
external/include/eigen3/Eigen/src/Core/arch/AVX/Complex.h
vendored
Normal file
@@ -0,0 +1,451 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_AVX_H
|
||||
#define EIGEN_COMPLEX_AVX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet4cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
|
||||
__m256 v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet4cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
|
||||
{
|
||||
return Packet4cf(pnegate(a.v));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
|
||||
{
|
||||
const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet4cf(_mm256_xor_ps(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
__m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
|
||||
__m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
|
||||
__m256 result = _mm256_addsub_ps(tmp1, tmp2);
|
||||
return Packet4cf(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
|
||||
{
|
||||
return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
|
||||
{
|
||||
// FIXME The following might be optimized using _mm256_movedup_pd
|
||||
Packet2cf a = ploaddup<Packet2cf>(from);
|
||||
Packet2cf b = ploaddup<Packet2cf>(from+1);
|
||||
return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
|
||||
std::imag(from[2*stride]), std::real(from[2*stride]),
|
||||
std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index stride)
|
||||
{
|
||||
__m128 low = _mm256_extractf128_ps(from.v, 0);
|
||||
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
|
||||
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
|
||||
|
||||
__m128 high = _mm256_extractf128_ps(from.v, 1);
|
||||
to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
|
||||
to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
|
||||
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
|
||||
__m128 low = _mm256_extractf128_ps(a.v, 0);
|
||||
__m128 high = _mm256_extractf128_ps(a.v, 1);
|
||||
__m128d lowd = _mm_castps_pd(low);
|
||||
__m128d highd = _mm_castps_pd(high);
|
||||
low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
|
||||
high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
|
||||
__m256 result = _mm256_setzero_ps();
|
||||
result = _mm256_insertf128_ps(result, low, 1);
|
||||
result = _mm256_insertf128_ps(result, high, 0);
|
||||
return Packet4cf(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
|
||||
Packet2cf(_mm256_extractf128_ps(a.v,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
|
||||
{
|
||||
Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
t0 = _mm256_hadd_ps(t0,t1);
|
||||
Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
|
||||
t2 = _mm256_hadd_ps(t2,t3);
|
||||
|
||||
t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
|
||||
t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
|
||||
|
||||
return Packet4cf(_mm256_add_ps(t1,t3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
|
||||
{
|
||||
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
|
||||
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
|
||||
{
|
||||
if (Offset==0) return;
|
||||
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
|
||||
{
|
||||
Packet4cf num = pmul(a, pconj(b));
|
||||
__m256 tmp = _mm256_mul_ps(b.v, b.v);
|
||||
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
|
||||
__m256 denom = _mm256_add_ps(tmp, tmp2);
|
||||
return Packet4cf(_mm256_div_ps(num.v, denom));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
|
||||
{
|
||||
return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
|
||||
}
|
||||
|
||||
//---------- double ----------
|
||||
struct Packet2cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
|
||||
__m256d v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 2,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
|
||||
{
|
||||
const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
|
||||
return Packet2cd(_mm256_xor_pd(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
__m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
|
||||
__m256d even = _mm256_mul_pd(tmp1, b.v);
|
||||
__m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
|
||||
__m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
|
||||
__m256d odd = _mm256_mul_pd(tmp2, tmp3);
|
||||
return Packet2cd(_mm256_addsub_pd(even, odd));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
|
||||
{
|
||||
// in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
|
||||
// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
|
||||
return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, Index stride)
|
||||
{
|
||||
return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, Index stride)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(from.v, 0);
|
||||
to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
|
||||
__m128d high = _mm256_extractf128_pd(from.v, 1);
|
||||
to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(a.v, 0);
|
||||
EIGEN_ALIGN16 double res[2];
|
||||
_mm_store_pd(res, low);
|
||||
return std::complex<double>(res[0],res[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
|
||||
__m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
|
||||
return Packet2cd(result);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
||||
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
|
||||
{
|
||||
Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
|
||||
Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
|
||||
|
||||
return Packet2cd(_mm256_add_pd(t0,t1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
|
||||
{
|
||||
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
|
||||
Packet1cd(_mm256_extractf128_pd(a.v,1))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cd>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
|
||||
{
|
||||
if (Offset==0) return;
|
||||
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
|
||||
{
|
||||
Packet2cd num = pmul(a, pconj(b));
|
||||
__m256d tmp = _mm256_mul_pd(b.v, b.v);
|
||||
__m256d denom = _mm256_hadd_pd(tmp, tmp);
|
||||
return Packet2cd(_mm256_div_pd(num.v, denom));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
|
||||
{
|
||||
return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4cf,4>& kernel) {
|
||||
__m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
|
||||
__m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
|
||||
__m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
|
||||
__m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
|
||||
|
||||
__m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
|
||||
__m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
|
||||
__m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
|
||||
__m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
|
||||
|
||||
kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
|
||||
kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
|
||||
kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
|
||||
kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cd,2>& kernel) {
|
||||
__m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
|
||||
kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
|
||||
{
|
||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
|
||||
{
|
||||
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_AVX_H
|
||||
439
external/include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h
vendored
Normal file
439
external/include/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h
vendored
Normal file
@@ -0,0 +1,439 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
|
||||
#define EIGEN_MATH_FUNCTIONS_AVX_H
|
||||
|
||||
/* The sin, cos, exp, and log functions of this file are loosely derived from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
inline Packet8i pshiftleft(Packet8i v, int n)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_slli_epi32(v, n);
|
||||
#else
|
||||
__m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
|
||||
__m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
|
||||
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline Packet8f pshiftright(Packet8f v, int n)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
|
||||
#else
|
||||
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
|
||||
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
|
||||
return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Sine function
|
||||
// Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and
|
||||
// evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants
|
||||
// are (anti-)symmetric and thus have only odd/even coefficients
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
psin<Packet8f>(const Packet8f& _x) {
|
||||
Packet8f x = _x;
|
||||
|
||||
// Some useful values.
|
||||
_EIGEN_DECLARE_CONST_Packet8i(one, 1);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);
|
||||
|
||||
// Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period.
|
||||
Packet8f z = pmul(x, p8f_one_over_pi);
|
||||
Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four));
|
||||
x = pmadd(shift, p8f_neg_pi_first, x);
|
||||
x = pmadd(shift, p8f_neg_pi_second, x);
|
||||
x = pmadd(shift, p8f_neg_pi_third, x);
|
||||
z = pmul(x, p8f_four_over_pi);
|
||||
|
||||
// Make a mask for the entries that need flipping, i.e. wherever the shift
|
||||
// is odd.
|
||||
Packet8i shift_ints = _mm256_cvtps_epi32(shift);
|
||||
Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
|
||||
Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
|
||||
|
||||
// Create a mask for which interpolant to use, i.e. if z > 1, then the mask
|
||||
// is set to ones for that entry.
|
||||
Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
|
||||
|
||||
// Evaluate the polynomial for the interval [1,3] in z.
|
||||
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
|
||||
Packet8f z_minus_two = psub(z, p8f_two);
|
||||
Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
|
||||
Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
|
||||
right = pmadd(right, z_minus_two2, p8f_coeff_right_2);
|
||||
right = pmadd(right, z_minus_two2, p8f_coeff_right_0);
|
||||
|
||||
// Evaluate the polynomial for the interval [-1,1] in z.
|
||||
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
|
||||
Packet8f z2 = pmul(z, z);
|
||||
Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
|
||||
left = pmadd(left, z2, p8f_coeff_left_3);
|
||||
left = pmadd(left, z2, p8f_coeff_left_1);
|
||||
left = pmul(left, z);
|
||||
|
||||
// Assemble the results, i.e. select the left and right polynomials.
|
||||
left = _mm256_andnot_ps(ival_mask, left);
|
||||
right = _mm256_and_ps(ival_mask, right);
|
||||
Packet8f res = _mm256_or_ps(left, right);
|
||||
|
||||
// Flip the sign on the odd intervals and return the result.
|
||||
res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
|
||||
return res;
|
||||
}
|
||||
|
||||
// Natural logarithm
|
||||
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
||||
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
|
||||
// be easily approximated by a polynomial centered on m=1 for stability.
|
||||
// TODO(gonnet): Further reduce the interval allowing for lower-degree
|
||||
// polynomial interpolants -> ... -> profit!
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
plog<Packet8f>(const Packet8f& _x) {
|
||||
Packet8f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
// The smallest non denormalized float number.
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000);
|
||||
|
||||
// Polynomial coefficients.
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ); // not greater equal is true if x is NaN
|
||||
Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
|
||||
|
||||
// Truncate input values to the minimum positive normal.
|
||||
x = pmax(x, p8f_min_norm_pos);
|
||||
|
||||
Packet8f emm0 = pshiftright(x,23);
|
||||
Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
|
||||
|
||||
// Set the exponents to -1, i.e. x are in the range [0.5,1).
|
||||
x = _mm256_and_ps(x, p8f_inv_mant_mask);
|
||||
x = _mm256_or_ps(x, p8f_half);
|
||||
|
||||
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
|
||||
// and shift by -1. The values are then centered around 0, which improves
|
||||
// the stability of the polynomial evaluation.
|
||||
// if( x < SQRTHF ) {
|
||||
// e -= 1;
|
||||
// x = x + x - 1.0;
|
||||
// } else { x = x - 1.0; }
|
||||
Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
|
||||
Packet8f tmp = _mm256_and_ps(x, mask);
|
||||
x = psub(x, p8f_1);
|
||||
e = psub(e, _mm256_and_ps(p8f_1, mask));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet8f x2 = pmul(x, x);
|
||||
Packet8f x3 = pmul(x2, x);
|
||||
|
||||
// Evaluate the polynomial approximant of degree 8 in three parts, probably
|
||||
// to improve instruction-level parallelism.
|
||||
Packet8f y, y1, y2;
|
||||
y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
|
||||
y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
|
||||
y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
|
||||
y = pmadd(y, x, p8f_cephes_log_p2);
|
||||
y1 = pmadd(y1, x, p8f_cephes_log_p5);
|
||||
y2 = pmadd(y2, x, p8f_cephes_log_p8);
|
||||
y = pmadd(y, x3, y1);
|
||||
y = pmadd(y, x3, y2);
|
||||
y = pmul(y, x3);
|
||||
|
||||
// Add the logarithm of the exponent back to the result of the interpolation.
|
||||
y1 = pmul(e, p8f_cephes_log_q1);
|
||||
tmp = pmul(x2, p8f_half);
|
||||
y = padd(y, y1);
|
||||
x = psub(x, tmp);
|
||||
y2 = pmul(e, p8f_cephes_log_q2);
|
||||
x = padd(x, y);
|
||||
x = padd(x, y2);
|
||||
|
||||
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
||||
return _mm256_or_ps(
|
||||
_mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
|
||||
_mm256_and_ps(iszero_mask, p8f_minus_inf));
|
||||
}
|
||||
|
||||
// Exponential function. Works by writing "x = m*log(2) + r" where
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
pexp<Packet8f>(const Packet8f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(127, 127.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
// Clamp x.
|
||||
Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo);
|
||||
|
||||
// Express exp(x) as exp(m*ln(2) + r), start by extracting
|
||||
// m = floor(x/ln(2) + 0.5).
|
||||
Packet8f m = _mm256_floor_ps(pmadd(x, p8f_cephes_LOG2EF, p8f_half));
|
||||
|
||||
// Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is
|
||||
// subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating
|
||||
// truncation errors. Note that we don't use the "pmadd" function here to
|
||||
// ensure that a precision-preserving FMA instruction is used.
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
_EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f);
|
||||
Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
|
||||
#else
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f);
|
||||
Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1));
|
||||
r = psub(r, pmul(m, p8f_cephes_exp_C2));
|
||||
#endif
|
||||
|
||||
Packet8f r2 = pmul(r, r);
|
||||
|
||||
// TODO(gonnet): Split into odd/even polynomials and try to exploit
|
||||
// instruction-level parallelism.
|
||||
Packet8f y = p8f_cephes_exp_p0;
|
||||
y = pmadd(y, r, p8f_cephes_exp_p1);
|
||||
y = pmadd(y, r, p8f_cephes_exp_p2);
|
||||
y = pmadd(y, r, p8f_cephes_exp_p3);
|
||||
y = pmadd(y, r, p8f_cephes_exp_p4);
|
||||
y = pmadd(y, r, p8f_cephes_exp_p5);
|
||||
y = pmadd(y, r2, r);
|
||||
y = padd(y, p8f_1);
|
||||
|
||||
// Build emm0 = 2^m.
|
||||
Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
|
||||
emm0 = pshiftleft(emm0, 23);
|
||||
|
||||
// Return 2^m * exp(r).
|
||||
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
|
||||
}
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
ptanh<Packet8f>(const Packet8f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||
pexp<Packet4d>(const Packet4d& _x) {
|
||||
Packet4d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
|
||||
|
||||
Packet4d tmp, fx;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
|
||||
// Express exp(x) as exp(g + n*log(2)).
|
||||
fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
|
||||
|
||||
// Get the integer modulus of log(2), i.e. the "n" described above.
|
||||
fx = _mm256_floor_pd(fx);
|
||||
|
||||
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
|
||||
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
|
||||
// digits right.
|
||||
tmp = pmul(fx, p4d_cephes_exp_C1);
|
||||
Packet4d z = pmul(fx, p4d_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
Packet4d x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial of the rational interpolant.
|
||||
Packet4d px = p4d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p4d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p4d_cephes_exp_p2);
|
||||
px = pmul(px, x);
|
||||
|
||||
// Evaluate the denominator polynomial of the rational interpolant.
|
||||
Packet4d qx = p4d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p4d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p4d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p4d_cephes_exp_q3);
|
||||
|
||||
// I don't really get this bit, copied from the SSE2 routines, so...
|
||||
// TODO(gonnet): Figure out what is going on here, perhaps find a better
|
||||
// rational interpolant?
|
||||
x = _mm256_div_pd(px, psub(qx, px));
|
||||
x = pmadd(p4d_2, x, p4d_1);
|
||||
|
||||
// Build e=2^n by constructing the exponents in a 128-bit vector and
|
||||
// shifting them to where they belong in double-precision values.
|
||||
__m128i emm0 = _mm256_cvtpd_epi32(fx);
|
||||
emm0 = _mm_add_epi32(emm0, p4i_1023);
|
||||
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
|
||||
__m128i lo = _mm_slli_epi64(emm0, 52);
|
||||
__m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
|
||||
__m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
|
||||
e = _mm256_insertf128_si256(e, hi, 1);
|
||||
|
||||
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
||||
// non-finite values in the input.
|
||||
return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
|
||||
}
|
||||
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. It does not handle +inf, or denormalized numbers correctly.
|
||||
// The main advantage of this approach is not just speed, but also the fact that
|
||||
// it can be inlined and pipelined with other computations, further reducing its
|
||||
// effective latency. This is similar to Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
psqrt<Packet8f>(const Packet8f& _x) {
|
||||
Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
|
||||
Packet8f denormal_mask = _mm256_and_ps(
|
||||
_mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
|
||||
_CMP_LT_OQ),
|
||||
_mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
|
||||
|
||||
// Compute approximate reciprocal sqrt.
|
||||
Packet8f x = _mm256_rsqrt_ps(_x);
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
|
||||
// Flush results for denormals to zero.
|
||||
return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
|
||||
}
|
||||
#else
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f psqrt<Packet8f>(const Packet8f& x) {
|
||||
return _mm256_sqrt_ps(x);
|
||||
}
|
||||
#endif
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4d psqrt<Packet4d>(const Packet4d& x) {
|
||||
return _mm256_sqrt_pd(x);
|
||||
}
|
||||
#if EIGEN_FAST_MATH
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet8f neg_half = pmul(_x, p8f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
|
||||
Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
|
||||
Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
|
||||
Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
|
||||
_mm256_and_ps(zero_mask, p8f_inf));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm256_or_ps(x, infs_and_nans);
|
||||
}
|
||||
|
||||
#else
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet8f prsqrt<Packet8f>(const Packet8f& x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
|
||||
return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4d prsqrt<Packet4d>(const Packet4d& x) {
|
||||
_EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
|
||||
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
|
||||
}
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_AVX_H
|
||||
637
external/include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
vendored
Normal file
637
external/include/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
vendored
Normal file
@@ -0,0 +1,637 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_AVX_H
|
||||
#define EIGEN_PACKET_MATH_AVX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
#ifdef __FMA__
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef __m256 Packet8f;
|
||||
typedef __m256i Packet8i;
|
||||
typedef __m256d Packet4d;
|
||||
|
||||
template<> struct is_arithmetic<__m256> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m256i> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m256d> { enum { value = true }; };
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
|
||||
const Packet8f p8f_##NAME = pset1<Packet8f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
|
||||
const Packet4d p4d_##NAME = pset1<Packet4d>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
|
||||
const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
|
||||
const Packet8i p8i_##NAME = pset1<Packet8i>(X)
|
||||
|
||||
// Use the packet_traits defined in AVX512/PacketMath.h instead if we're going
|
||||
// to leverage AVX512 instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet8f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=8,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = 0,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasBlend = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet4d type;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
|
||||
template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
|
||||
|
||||
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
|
||||
use SSE instructions and packets to deal with integers.
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet8i type;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=8
|
||||
};
|
||||
};
|
||||
*/
|
||||
|
||||
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
|
||||
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
|
||||
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
|
||||
{
|
||||
return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
|
||||
{
|
||||
return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
|
||||
{ eigen_assert(false && "packet integer division are not supported by AVX");
|
||||
return pset1<Packet8i>(0);
|
||||
}
|
||||
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and even register spilling with clang>=6.0 (bug 1637).
|
||||
// Gcc stupidly generates a vfmadd132ps instruction.
|
||||
// So let's enforce it to generate a vfmadd231ps instruction since the most common use
|
||||
// case is to accumulate the result of the product.
|
||||
Packet8f res = c;
|
||||
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
#else
|
||||
return _mm256_fmadd_ps(a,b,c);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// see above
|
||||
Packet4d res = c;
|
||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
#else
|
||||
return _mm256_fmadd_pd(a,b,c);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
|
||||
|
||||
// Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
|
||||
{
|
||||
// TODO try to find a way to avoid the need of a temporary register
|
||||
// Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from));
|
||||
// tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
|
||||
// return _mm256_unpacklo_ps(tmp,tmp);
|
||||
|
||||
// _mm256_insertf128_ps is very slow on Haswell, thus:
|
||||
Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
|
||||
// mimic an "inplace" permutation of the lower 128bits using a blend
|
||||
tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
|
||||
// then we can perform a consistent permutation on the global register to get everything in shape:
|
||||
return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
|
||||
}
|
||||
// Loads 2 doubles from memory a returns the packet {a0, a0 a1, a1}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
|
||||
{
|
||||
Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
|
||||
return _mm256_permute_pd(tmp, 3<<2);
|
||||
}
|
||||
|
||||
// Loads 2 floats from memory a returns the packet {a0, a0 a0, a0, a1, a1, a1, a1}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
|
||||
{
|
||||
Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
|
||||
return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
|
||||
|
||||
// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
|
||||
// NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
|
||||
{
|
||||
return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
|
||||
from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
|
||||
{
|
||||
return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
|
||||
{
|
||||
__m128 low = _mm256_extractf128_ps(from, 0);
|
||||
to[stride*0] = _mm_cvtss_f32(low);
|
||||
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
|
||||
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
|
||||
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
|
||||
|
||||
__m128 high = _mm256_extractf128_ps(from, 1);
|
||||
to[stride*4] = _mm_cvtss_f32(high);
|
||||
to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
|
||||
to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
|
||||
to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
|
||||
{
|
||||
__m128d low = _mm256_extractf128_pd(from, 0);
|
||||
to[stride*0] = _mm_cvtsd_f64(low);
|
||||
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
|
||||
__m128d high = _mm256_extractf128_pd(from, 1);
|
||||
to[stride*2] = _mm_cvtsd_f64(high);
|
||||
to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
|
||||
{
|
||||
Packet8f pa = pset1<Packet8f>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
|
||||
{
|
||||
Packet4d pa = pset1<Packet4d>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
|
||||
{
|
||||
Packet8i pa = pset1<Packet8i>(a);
|
||||
pstore(to, pa);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
|
||||
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
|
||||
return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
|
||||
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
|
||||
{
|
||||
__m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
|
||||
return _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
|
||||
{
|
||||
__m256d tmp = _mm256_shuffle_pd(a,a,5);
|
||||
return _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
#if 0
|
||||
// This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
|
||||
// exhibit the same latency/throughput, but it is here for future reference/benchmarking...
|
||||
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
|
||||
return _mm256_permute_pd(swap_halves,5);
|
||||
#endif
|
||||
}
|
||||
|
||||
// pabs should be ok
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
|
||||
{
|
||||
const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
|
||||
return _mm256_and_ps(a,mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
|
||||
{
|
||||
const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
|
||||
return _mm256_and_pd(a,mask);
|
||||
}
|
||||
|
||||
// preduxp should be ok
|
||||
// FIXME: why is this ok? why isn't the simply implementation working as expected?
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
|
||||
{
|
||||
__m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
|
||||
__m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
|
||||
__m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
|
||||
__m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
|
||||
|
||||
__m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
|
||||
__m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
|
||||
__m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
|
||||
__m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
|
||||
|
||||
__m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
|
||||
__m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
|
||||
__m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
|
||||
__m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
|
||||
|
||||
__m256 sum1 = _mm256_add_ps(perm1, hsum5);
|
||||
__m256 sum2 = _mm256_add_ps(perm2, hsum6);
|
||||
__m256 sum3 = _mm256_add_ps(perm3, hsum7);
|
||||
__m256 sum4 = _mm256_add_ps(perm4, hsum8);
|
||||
|
||||
__m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
|
||||
__m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
|
||||
|
||||
__m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
|
||||
return final;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
|
||||
{
|
||||
Packet4d tmp0, tmp1;
|
||||
|
||||
tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
|
||||
tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
|
||||
|
||||
tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
|
||||
tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
|
||||
|
||||
return _mm256_blend_pd(tmp0, tmp1, 0xC);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp;
|
||||
tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp;
|
||||
tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
|
||||
{
|
||||
Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
|
||||
tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
|
||||
return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
|
||||
{
|
||||
Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
|
||||
return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet8f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 1);
|
||||
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
|
||||
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
|
||||
first = _mm256_blend_ps(tmp1, tmp2, 0x88);
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 3);
|
||||
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
|
||||
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
|
||||
first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 7);
|
||||
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
|
||||
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
|
||||
first = _mm256_blend_ps(tmp1, tmp2, 0xee);
|
||||
}
|
||||
else if (Offset==4)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 15);
|
||||
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
|
||||
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
|
||||
first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
|
||||
}
|
||||
else if (Offset==5)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 31);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0x88);
|
||||
}
|
||||
else if (Offset==6)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 63);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0xcc);
|
||||
}
|
||||
else if (Offset==7)
|
||||
{
|
||||
first = _mm256_blend_ps(first, second, 127);
|
||||
first = _mm256_permute2f128_ps(first, first, 1);
|
||||
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
|
||||
first = _mm256_permute2f128_ps(tmp, tmp, 1);
|
||||
first = _mm256_blend_ps(tmp, first, 0xee);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4d>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 1);
|
||||
__m256d tmp = _mm256_permute_pd(first, 5);
|
||||
first = _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
first = _mm256_blend_pd(tmp, first, 0xA);
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 3);
|
||||
first = _mm256_permute2f128_pd(first, first, 1);
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm256_blend_pd(first, second, 7);
|
||||
__m256d tmp = _mm256_permute_pd(first, 5);
|
||||
first = _mm256_permute2f128_pd(tmp, tmp, 1);
|
||||
first = _mm256_blend_pd(tmp, first, 5);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet8f,8>& kernel) {
|
||||
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
|
||||
__m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
|
||||
__m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
|
||||
__m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
|
||||
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
|
||||
kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
|
||||
kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
|
||||
kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
|
||||
kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
|
||||
kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
|
||||
kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
|
||||
kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
|
||||
kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet8f,4>& kernel) {
|
||||
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
|
||||
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
|
||||
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
|
||||
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
|
||||
|
||||
kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
|
||||
kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
|
||||
kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
|
||||
kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4d,4>& kernel) {
|
||||
__m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
|
||||
__m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
|
||||
__m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
|
||||
__m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
|
||||
|
||||
kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
|
||||
kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
|
||||
kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
|
||||
kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
|
||||
const __m256 zero = _mm256_setzero_ps();
|
||||
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
|
||||
return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
|
||||
const __m256d zero = _mm256_setzero_pd();
|
||||
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
|
||||
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
|
||||
{
|
||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
|
||||
{
|
||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
|
||||
{
|
||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
|
||||
{
|
||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_AVX_H
|
||||
51
external/include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h
vendored
Normal file
51
external/include/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TYPE_CASTING_AVX_H
|
||||
#define EIGEN_TYPE_CASTING_AVX_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// For now we use SSE to handle integers, so we can't use AVX instructions to cast
|
||||
// from int to float
|
||||
template <>
|
||||
struct type_casting_traits<float, int> {
|
||||
enum {
|
||||
VectorizedCast = 0,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<int, float> {
|
||||
enum {
|
||||
VectorizedCast = 0,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
||||
return _mm256_cvtps_epi32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
|
||||
return _mm256_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TYPE_CASTING_AVX_H
|
||||
391
external/include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h
vendored
Normal file
391
external/include/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h
vendored
Normal file
@@ -0,0 +1,391 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 3)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
||||
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
|
||||
const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
|
||||
const Packet8d p8d_##NAME = pset1<Packet8d>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
||||
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
|
||||
|
||||
// Natural logarithm
|
||||
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
||||
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
|
||||
// be easily approximated by a polynomial centered on m=1 for stability.
|
||||
#if defined(EIGEN_VECTORIZE_AVX512DQ)
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
plog<Packet16f>(const Packet16f& _x) {
|
||||
Packet16f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
// The smallest non denormalized float number.
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
|
||||
|
||||
// Polynomial coefficients.
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
// invalid_mask is set to true when x is NaN
|
||||
__mmask16 invalid_mask =
|
||||
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
|
||||
__mmask16 iszero_mask =
|
||||
_mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_UQ);
|
||||
|
||||
// Truncate input values to the minimum positive normal.
|
||||
x = pmax(x, p16f_min_norm_pos);
|
||||
|
||||
// Extract the shifted exponents.
|
||||
Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
|
||||
Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
|
||||
|
||||
// Set the exponents to -1, i.e. x are in the range [0.5,1).
|
||||
x = _mm512_and_ps(x, p16f_inv_mant_mask);
|
||||
x = _mm512_or_ps(x, p16f_half);
|
||||
|
||||
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
|
||||
// and shift by -1. The values are then centered around 0, which improves
|
||||
// the stability of the polynomial evaluation.
|
||||
// if( x < SQRTHF ) {
|
||||
// e -= 1;
|
||||
// x = x + x - 1.0;
|
||||
// } else { x = x - 1.0; }
|
||||
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
|
||||
Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
|
||||
x = psub(x, p16f_1);
|
||||
e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet16f x2 = pmul(x, x);
|
||||
Packet16f x3 = pmul(x2, x);
|
||||
|
||||
// Evaluate the polynomial approximant of degree 8 in three parts, probably
|
||||
// to improve instruction-level parallelism.
|
||||
Packet16f y, y1, y2;
|
||||
y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
|
||||
y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
|
||||
y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
|
||||
y = pmadd(y, x, p16f_cephes_log_p2);
|
||||
y1 = pmadd(y1, x, p16f_cephes_log_p5);
|
||||
y2 = pmadd(y2, x, p16f_cephes_log_p8);
|
||||
y = pmadd(y, x3, y1);
|
||||
y = pmadd(y, x3, y2);
|
||||
y = pmul(y, x3);
|
||||
|
||||
// Add the logarithm of the exponent back to the result of the interpolation.
|
||||
y1 = pmul(e, p16f_cephes_log_q1);
|
||||
tmp = pmul(x2, p16f_half);
|
||||
y = padd(y, y1);
|
||||
x = psub(x, tmp);
|
||||
y2 = pmul(e, p16f_cephes_log_q2);
|
||||
x = padd(x, y);
|
||||
x = padd(x, y2);
|
||||
|
||||
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
||||
return _mm512_mask_blend_ps(iszero_mask,
|
||||
_mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
|
||||
p16f_minus_inf);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Exponential function. Works by writing "x = m*log(2) + r" where
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
pexp<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
// Clamp x.
|
||||
Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
|
||||
|
||||
// Express exp(x) as exp(m*ln(2) + r), start by extracting
|
||||
// m = floor(x/ln(2) + 0.5).
|
||||
Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
|
||||
|
||||
// Get r = x - m*ln(2). Note that we can do this without losing more than one
|
||||
// ulp precision due to the FMA instruction.
|
||||
_EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
||||
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
|
||||
Packet16f r2 = pmul(r, r);
|
||||
|
||||
// TODO(gonnet): Split into odd/even polynomials and try to exploit
|
||||
// instruction-level parallelism.
|
||||
Packet16f y = p16f_cephes_exp_p0;
|
||||
y = pmadd(y, r, p16f_cephes_exp_p1);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p2);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p3);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p4);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p5);
|
||||
y = pmadd(y, r2, r);
|
||||
y = padd(y, p16f_1);
|
||||
|
||||
// Build emm0 = 2^m.
|
||||
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
|
||||
emm0 = _mm512_slli_epi32(emm0, 23);
|
||||
|
||||
// Return 2^m * exp(r).
|
||||
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
|
||||
}
|
||||
|
||||
/*template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
pexp<Packet8d>(const Packet8d& _x) {
|
||||
Packet8d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(1, 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(2, 2.0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p8d_exp_hi), p8d_exp_lo);
|
||||
|
||||
// Express exp(x) as exp(g + n*log(2)).
|
||||
const Packet8d n =
|
||||
_mm512_mul_round_pd(p8d_cephes_LOG2EF, x, _MM_FROUND_TO_NEAREST_INT);
|
||||
|
||||
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
|
||||
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
|
||||
// digits right.
|
||||
const Packet8d nC1 = pmul(n, p8d_cephes_exp_C1);
|
||||
const Packet8d nC2 = pmul(n, p8d_cephes_exp_C2);
|
||||
x = psub(x, nC1);
|
||||
x = psub(x, nC2);
|
||||
|
||||
const Packet8d x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial of the rational interpolant.
|
||||
Packet8d px = p8d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p8d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p8d_cephes_exp_p2);
|
||||
px = pmul(px, x);
|
||||
|
||||
// Evaluate the denominator polynomial of the rational interpolant.
|
||||
Packet8d qx = p8d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p8d_cephes_exp_q3);
|
||||
|
||||
// I don't really get this bit, copied from the SSE2 routines, so...
|
||||
// TODO(gonnet): Figure out what is going on here, perhaps find a better
|
||||
// rational interpolant?
|
||||
x = _mm512_div_pd(px, psub(qx, px));
|
||||
x = pmadd(p8d_2, x, p8d_1);
|
||||
|
||||
// Build e=2^n.
|
||||
const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
|
||||
_mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
|
||||
|
||||
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
||||
// non-finite values in the input.
|
||||
return pmax(pmul(x, e), _x);
|
||||
}*/
|
||||
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. The main advantage of this approach is not just speed, but
|
||||
// also the fact that it can be inlined and pipelined with other computations,
|
||||
// further reducing its effective latency.
|
||||
#if EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
psqrt<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet16f neg_half = pmul(_x, p16f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
||||
// Multiply the original _x by it's reciprocal square root to extract the
|
||||
// square root.
|
||||
return pmul(_x, x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
psqrt<Packet8d>(const Packet8d& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
|
||||
|
||||
Packet8d neg_half = pmul(_x, p8d_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Do a second step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Multiply the original _x by it's reciprocal square root to extract the
|
||||
// square root.
|
||||
return pmul(_x, x);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
|
||||
return _mm512_sqrt_ps(x);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
|
||||
return _mm512_sqrt_pd(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Functions for rsqrt.
|
||||
// Almost identical to the sqrt routine, just leave out the last multiplication
|
||||
// and fill in NaN/Inf where needed. Note that this function only exists as an
|
||||
// iterative version for doubles since there is no instruction for diretly
|
||||
// computing the reciprocal square root in AVX-512.
|
||||
#ifdef EIGEN_FAST_MATH
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
prsqrt<Packet16f>(const Packet16f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet16f neg_half = pmul(_x, p16f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
|
||||
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
|
||||
Packet16f infs_and_nans = _mm512_mask_blend_ps(
|
||||
neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
|
||||
prsqrt<Packet8d>(const Packet8d& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
|
||||
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
|
||||
|
||||
Packet8d neg_half = pmul(_x, p8d_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
|
||||
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
|
||||
Packet8d infs_and_nans = _mm512_mask_blend_pd(
|
||||
neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
|
||||
|
||||
// Do a first step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Do a second step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
|
||||
}
|
||||
#elif defined(EIGEN_VECTORIZE_AVX512ER)
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||
return _mm512_rsqrt28_ps(x);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
|
||||
1316
external/include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h
vendored
Normal file
1316
external/include/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
430
external/include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h
vendored
Normal file
430
external/include/eigen3/Eigen/src/Core/arch/AltiVec/Complex.h
vendored
Normal file
@@ -0,0 +1,430 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2010-2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX32_ALTIVEC_H
|
||||
#define EIGEN_COMPLEX32_ALTIVEC_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
#ifdef __VSX__
|
||||
#if defined(_BIG_ENDIAN)
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
#else
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
Packet4f v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
#ifdef __VSX__
|
||||
HasBlend = 1,
|
||||
#endif
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
if((std::ptrdiff_t(&from) % 16) == 0)
|
||||
res.v = pload<Packet4f>((const float *)&from);
|
||||
else
|
||||
res.v = ploadu<Packet4f>((const float *)&from);
|
||||
res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return pload<Packet2cf>(af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
Packet4f v1, v2;
|
||||
|
||||
// Permute and multiply the real parts of a and b
|
||||
v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
|
||||
// Get the imaginary parts of a
|
||||
v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
|
||||
// multiply a_re * b
|
||||
v1 = vec_madd(v1, b.v, p4f_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(v2, b.v, p4f_ZERO);
|
||||
v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
|
||||
// permute back to a proper order
|
||||
v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
|
||||
|
||||
return Packet2cf(padd<Packet4f>(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 res[2];
|
||||
pstore((float *)&res, a.v);
|
||||
|
||||
return res[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
||||
{
|
||||
Packet4f rev_a;
|
||||
rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
|
||||
return Packet2cf(rev_a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
Packet4f b;
|
||||
b = vec_sld(a.v, a.v, 8);
|
||||
b = padd<Packet4f>(a.v, b);
|
||||
return pfirst<Packet2cf>(Packet2cf(b));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
Packet4f b1, b2;
|
||||
#ifdef _BIG_ENDIAN
|
||||
b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
|
||||
b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
|
||||
#else
|
||||
b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
|
||||
b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
|
||||
#endif
|
||||
b2 = vec_sld(b2, b2, 8);
|
||||
b2 = padd<Packet4f>(b1, b2);
|
||||
|
||||
return Packet2cf(b2);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
Packet4f b;
|
||||
Packet2cf prod;
|
||||
b = vec_sld(a.v, a.v, 8);
|
||||
prod = pmul<Packet2cf>(a, Packet2cf(b));
|
||||
|
||||
return pfirst<Packet2cf>(prod);
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
#ifdef _BIG_ENDIAN
|
||||
first.v = vec_sld(first.v, second.v, 8);
|
||||
#else
|
||||
first.v = vec_sld(second.v, first.v, 8);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
|
||||
Packet4f s = pmul<Packet4f>(b.v, b.v);
|
||||
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
|
||||
{
|
||||
return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
{
|
||||
Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
Packet2cf result;
|
||||
result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
//---------- double ----------
|
||||
#ifdef __VSX__
|
||||
struct Packet1cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
||||
Packet2d v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
|
||||
{
|
||||
std::complex<double> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return pload<Packet1cd>(af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
|
||||
{
|
||||
std::complex<double> EIGEN_ALIGN16 af[2];
|
||||
pstore<std::complex<double> >(af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
Packet2d a_re, a_im, v1, v2;
|
||||
|
||||
// Permute and multiply the real parts of a and b
|
||||
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
|
||||
// Get the imaginary parts of a
|
||||
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
|
||||
// multiply a_re * b
|
||||
v1 = vec_madd(a_re, b.v, p2d_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(a_im, b.v, p2d_ZERO);
|
||||
v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
|
||||
v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
|
||||
|
||||
return Packet1cd(padd<Packet2d>(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
std::complex<double> EIGEN_ALIGN16 res[2];
|
||||
pstore<std::complex<double> >(res, a);
|
||||
|
||||
return res[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
||||
{
|
||||
// FIXME is it sure we never have to align a Packet1cd?
|
||||
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
||||
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
||||
return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
||||
{
|
||||
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
#endif // __VSX__
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX32_ALTIVEC_H
|
||||
322
external/include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h
vendored
Normal file
322
external/include/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h
vendored
Normal file
@@ -0,0 +1,322 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007 Julien Pommier
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/* The sin, cos, exp, and log functions of this file come from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
/* the smallest non denormalized float number */
|
||||
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000); // -1.f/0.f
|
||||
static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan, 0xffffffff);
|
||||
|
||||
/* natural logarithm computed for 4 simultaneous float
|
||||
return NaN for x <= 0
|
||||
*/
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
#ifdef __VSX__
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
static Packet2l p2l_1023 = { 1023, 1023 };
|
||||
static Packet2ul p2ul_52 = { 52, 52 };
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
|
||||
Packet4i emm0;
|
||||
|
||||
/* isvalid_mask is 0 if x < 0 or x is NaN. */
|
||||
Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
|
||||
Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
|
||||
|
||||
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
|
||||
emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
|
||||
reinterpret_cast<Packet4ui>(p4i_23));
|
||||
|
||||
/* keep only the fractional part */
|
||||
x = pand(x, p4f_inv_mant_mask);
|
||||
x = por(x, p4f_half);
|
||||
|
||||
emm0 = psub(emm0, p4i_0x7f);
|
||||
Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
|
||||
|
||||
/* part2:
|
||||
if( x < SQRTHF ) {
|
||||
e -= 1;
|
||||
x = x + x - 1.0;
|
||||
} else { x = x - 1.0; }
|
||||
*/
|
||||
Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
|
||||
Packet4f tmp = pand(x, mask);
|
||||
x = psub(x, p4f_1);
|
||||
e = psub(e, pand(p4f_1, mask));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet4f x2 = pmul(x,x);
|
||||
Packet4f x3 = pmul(x2,x);
|
||||
|
||||
Packet4f y, y1, y2;
|
||||
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
|
||||
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
|
||||
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
|
||||
y = pmadd(y , x, p4f_cephes_log_p2);
|
||||
y1 = pmadd(y1, x, p4f_cephes_log_p5);
|
||||
y2 = pmadd(y2, x, p4f_cephes_log_p8);
|
||||
y = pmadd(y, x3, y1);
|
||||
y = pmadd(y, x3, y2);
|
||||
y = pmul(y, x3);
|
||||
|
||||
y1 = pmul(e, p4f_cephes_log_q1);
|
||||
tmp = pmul(x2, p4f_half);
|
||||
y = padd(y, y1);
|
||||
x = psub(x, tmp);
|
||||
y2 = pmul(e, p4f_cephes_log_q2);
|
||||
x = padd(x, y);
|
||||
x = padd(x, y2);
|
||||
// negative arg will be NAN, 0 will be -INF
|
||||
x = vec_sel(x, p4f_minus_inf, iszero_mask);
|
||||
x = vec_sel(p4f_minus_nan, x, isvalid_mask);
|
||||
return x;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
|
||||
Packet4f tmp, fx;
|
||||
Packet4i emm0;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
|
||||
|
||||
// express exp(x) as exp(g + n*log(2))
|
||||
fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
|
||||
|
||||
fx = pfloor(fx);
|
||||
|
||||
tmp = pmul(fx, p4f_cephes_exp_C1);
|
||||
Packet4f z = pmul(fx, p4f_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
z = pmul(x,x);
|
||||
|
||||
Packet4f y = p4f_cephes_exp_p0;
|
||||
y = pmadd(y, x, p4f_cephes_exp_p1);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p2);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p3);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p4);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p5);
|
||||
y = pmadd(y, z, x);
|
||||
y = padd(y, p4f_1);
|
||||
|
||||
// build 2^n
|
||||
emm0 = vec_cts(fx, 0);
|
||||
emm0 = vec_add(emm0, p4i_0x7f);
|
||||
emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23));
|
||||
|
||||
// Altivec's max & min operators just drop silent NaNs. Check NaNs in
|
||||
// inputs and return them unmodified.
|
||||
Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
|
||||
return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
|
||||
isnumber_mask);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VSX__
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_sqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return vec_sqrt(x);
|
||||
}
|
||||
|
||||
// VSX support varies between different compilers and even different
|
||||
// versions of the same compiler. For gcc version >= 4.9.3, we can use
|
||||
// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
|
||||
// a slow version that works with older compilers.
|
||||
// Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles
|
||||
// are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963
|
||||
static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
|
||||
#if EIGEN_GNUC_AT_LEAST(5, 4) || \
|
||||
(EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)
|
||||
return vec_cts(x, 0); // TODO: check clang version.
|
||||
#else
|
||||
double tmp[2];
|
||||
memcpy(tmp, &x, sizeof(tmp));
|
||||
Packet2l l = { static_cast<long long>(tmp[0]),
|
||||
static_cast<long long>(tmp[1]) };
|
||||
return l;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
Packet2d x = _x;
|
||||
|
||||
Packet2d tmp, fx;
|
||||
Packet2l emm0;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
|
||||
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half);
|
||||
|
||||
fx = pfloor(fx);
|
||||
|
||||
tmp = pmul(fx, p2d_cephes_exp_C1);
|
||||
Packet2d z = pmul(fx, p2d_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
Packet2d x2 = pmul(x,x);
|
||||
|
||||
Packet2d px = p2d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p2d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p2d_cephes_exp_p2);
|
||||
px = pmul (px, x);
|
||||
|
||||
Packet2d qx = p2d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q3);
|
||||
|
||||
x = pdiv(px,psub(qx,px));
|
||||
x = pmadd(p2d_2,x,p2d_1);
|
||||
|
||||
// build 2^n
|
||||
emm0 = ConvertToPacket2l(fx);
|
||||
|
||||
#ifdef __POWER8_VECTOR__
|
||||
emm0 = vec_add(emm0, p2l_1023);
|
||||
emm0 = vec_sl(emm0, p2ul_52);
|
||||
#else
|
||||
// Code is a bit complex for POWER7. There is actually a
|
||||
// vec_xxsldi intrinsic but it is not supported by some gcc versions.
|
||||
// So we shift (52-32) bits and do a word swap with zeros.
|
||||
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(20, 20); // 52 - 32
|
||||
|
||||
Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
|
||||
emm04i = vec_add(emm04i, p4i_1023);
|
||||
emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
|
||||
static const Packet16uc perm = {
|
||||
0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
|
||||
0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
|
||||
#ifdef _BIG_ENDIAN
|
||||
emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
|
||||
#else
|
||||
emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// Altivec's max & min operators just drop silent NaNs. Check NaNs in
|
||||
// inputs and return them unmodified.
|
||||
Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
|
||||
return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
|
||||
isnumber_mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
1061
external/include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h
vendored
Normal file
1061
external/include/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
103
external/include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h
vendored
Normal file
103
external/include/eigen3/Eigen/src/Core/arch/CUDA/Complex.h
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_CUDA_H
|
||||
#define EIGEN_COMPLEX_CUDA_H
|
||||
|
||||
// clang-format off
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
||||
|
||||
// Many std::complex methods such as operator+, operator-, operator* and
|
||||
// operator/ are not constexpr. Due to this, clang does not treat them as device
|
||||
// functions and thus Eigen functors making use of these operators fail to
|
||||
// compile. Here, we manually specialize these functors for complex types when
|
||||
// building for CUDA to avoid non-constexpr methods.
|
||||
|
||||
// Sum
|
||||
template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
return std::complex<T>(numext::real(a) + numext::real(b),
|
||||
numext::imag(a) + numext::imag(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Difference
|
||||
template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
return std::complex<T>(numext::real(a) - numext::real(b),
|
||||
numext::imag(a) - numext::imag(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Product
|
||||
template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
enum {
|
||||
Vectorizable = packet_traits<std::complex<T>>::HasMul
|
||||
};
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
const T a_real = numext::real(a);
|
||||
const T a_imag = numext::imag(a);
|
||||
const T b_real = numext::real(b);
|
||||
const T b_imag = numext::imag(b);
|
||||
return std::complex<T>(a_real * b_real - a_imag * b_imag,
|
||||
a_real * b_imag + a_imag * b_real);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
|
||||
// Quotient
|
||||
template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
|
||||
enum {
|
||||
Vectorizable = packet_traits<std::complex<T>>::HasDiv
|
||||
};
|
||||
typedef typename std::complex<T> result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||
const T a_real = numext::real(a);
|
||||
const T a_imag = numext::imag(a);
|
||||
const T b_real = numext::real(b);
|
||||
const T b_imag = numext::imag(b);
|
||||
const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
|
||||
return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
|
||||
(a_imag * b_real - a_real * b_imag) * norm);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_CUDA_H
|
||||
674
external/include/eigen3/Eigen/src/Core/arch/CUDA/Half.h
vendored
Normal file
674
external/include/eigen3/Eigen/src/Core/arch/CUDA/Half.h
vendored
Normal file
@@ -0,0 +1,674 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
//
|
||||
// The conversion routines are Copyright (c) Fabian Giesen, 2016.
|
||||
// The original license follows:
|
||||
//
|
||||
// Copyright (c) Fabian Giesen, 2016
|
||||
// All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted.
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
// Standard 16-bit float type, mostly useful for GPUs. Defines a new
|
||||
// type Eigen::half (inheriting from CUDA's __half struct) with
|
||||
// operator overloads such that it behaves basically as an arithmetic
|
||||
// type. It will be quite slow on CPUs (so it is recommended to stay
|
||||
// in float32_bits for CPUs, except for simple parameter conversions, I/O
|
||||
// to disk and the likes), but fast on GPUs.
|
||||
|
||||
|
||||
#ifndef EIGEN_HALF_CUDA_H
|
||||
#define EIGEN_HALF_CUDA_H
|
||||
|
||||
#if __cplusplus > 199711L
|
||||
#define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type()
|
||||
#else
|
||||
#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
|
||||
#endif
|
||||
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
struct half;
|
||||
|
||||
namespace half_impl {
|
||||
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
// Make our own __half_raw definition that is similar to CUDA's.
|
||||
struct __half_raw {
|
||||
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
|
||||
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
|
||||
unsigned short x;
|
||||
};
|
||||
#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
|
||||
// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
|
||||
typedef __half __half_raw;
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
|
||||
|
||||
struct half_base : public __half_raw {
|
||||
EIGEN_DEVICE_FUNC half_base() {}
|
||||
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace half_impl
|
||||
|
||||
// Class definition.
|
||||
struct half : public half_impl::half_base {
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
|
||||
typedef half_impl::__half_raw __half_raw;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC half() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
#endif
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC half(bool b)
|
||||
: half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
|
||||
template<class T>
|
||||
explicit EIGEN_DEVICE_FUNC half(const T& val)
|
||||
: half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
|
||||
explicit EIGEN_DEVICE_FUNC half(float f)
|
||||
: half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
|
||||
// +0.0 and -0.0 become false, everything else becomes true.
|
||||
return (x & 0x7fff) != 0;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
|
||||
return static_cast<signed char>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
|
||||
return static_cast<unsigned char>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
|
||||
return static_cast<short>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
|
||||
return static_cast<unsigned short>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
|
||||
return static_cast<int>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
|
||||
return static_cast<unsigned int>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
|
||||
return static_cast<long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
|
||||
return static_cast<unsigned long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
|
||||
return static_cast<long long>(half_impl::half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
|
||||
return static_cast<unsigned long long>(half_to_float(*this));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
|
||||
return half_impl::half_to_float(*this);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
|
||||
return static_cast<double>(half_impl::half_to_float(*this));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC half& operator=(const half& other) {
|
||||
x = other.x;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct numeric_limits<Eigen::half> {
|
||||
static const bool is_specialized = true;
|
||||
static const bool is_signed = true;
|
||||
static const bool is_integer = false;
|
||||
static const bool is_exact = false;
|
||||
static const bool has_infinity = true;
|
||||
static const bool has_quiet_NaN = true;
|
||||
static const bool has_signaling_NaN = true;
|
||||
static const float_denorm_style has_denorm = denorm_present;
|
||||
static const bool has_denorm_loss = false;
|
||||
static const std::float_round_style round_style = std::round_to_nearest;
|
||||
static const bool is_iec559 = false;
|
||||
static const bool is_bounded = false;
|
||||
static const bool is_modulo = false;
|
||||
static const int digits = 11;
|
||||
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int radix = 2;
|
||||
static const int min_exponent = -13;
|
||||
static const int min_exponent10 = -4;
|
||||
static const int max_exponent = 16;
|
||||
static const int max_exponent10 = 4;
|
||||
static const bool traps = true;
|
||||
static const bool tinyness_before = false;
|
||||
|
||||
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
|
||||
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
|
||||
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
|
||||
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
|
||||
static Eigen::half round_error() { return Eigen::half(0.5); }
|
||||
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
|
||||
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
|
||||
};
|
||||
|
||||
// If std::numeric_limits<T> is specialized, should also specialize
|
||||
// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
|
||||
// std::numeric_limits<const volatile T>
|
||||
// https://stackoverflow.com/a/16519653/
|
||||
template<>
|
||||
struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
template<>
|
||||
struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
template<>
|
||||
struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
} // end namespace std
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace half_impl {
|
||||
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
|
||||
// Intrinsics for native fp16 support. Note that on current hardware,
|
||||
// these are no faster than float32_bits arithmetic (you need to use the half2
|
||||
// versions to get the ALU speed increased), but you do save the
|
||||
// conversion steps back and forth.
|
||||
|
||||
EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
|
||||
return __hadd(a, b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
|
||||
return __hmul(a, b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
|
||||
return __hsub(a, b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
|
||||
float num = __half2float(a);
|
||||
float denom = __half2float(b);
|
||||
return __float2half(num / denom);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
|
||||
return __hneg(a);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
|
||||
a = a * b;
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
|
||||
a = a - b;
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
|
||||
a = a / b;
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
|
||||
return __heq(a, b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
|
||||
return __hne(a, b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
|
||||
return __hlt(a, b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
|
||||
return __hle(a, b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
|
||||
return __hgt(a, b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
|
||||
return __hge(a, b);
|
||||
}
|
||||
|
||||
#else // Emulate support for half floats
|
||||
|
||||
// Definitions for CPUs and older CUDA, mostly working through conversion
|
||||
// to/from float32_bits.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
|
||||
return half(float(a) + float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
|
||||
return half(float(a) * float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
|
||||
return half(float(a) - float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
|
||||
return half(float(a) / float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
|
||||
half result;
|
||||
result.x = a.x ^ 0x8000;
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
|
||||
a = half(float(a) + float(b));
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
|
||||
a = half(float(a) * float(b));
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
|
||||
a = half(float(a) - float(b));
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
|
||||
a = half(float(a) / float(b));
|
||||
return a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
|
||||
return numext::equal_strict(float(a),float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
|
||||
return numext::not_equal_strict(float(a), float(b));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
|
||||
return float(a) < float(b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
|
||||
return float(a) <= float(b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
|
||||
return float(a) > float(b);
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
|
||||
return float(a) >= float(b);
|
||||
}
|
||||
|
||||
#endif // Emulate support for half floats
|
||||
|
||||
// Division by an index. Do it in full float precision to avoid accuracy
|
||||
// issues in converting the denominator to half.
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
|
||||
return half(static_cast<float>(a) / static_cast<float>(b));
|
||||
}
|
||||
|
||||
// Conversion routines, including fallbacks for the host or older CUDA.
|
||||
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
|
||||
// these in hardware. If we need more performance on older/other CPUs, they are
|
||||
// also possible to vectorize directly.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
|
||||
__half_raw h;
|
||||
h.x = x;
|
||||
return h;
|
||||
}
|
||||
|
||||
union float32_bits {
|
||||
unsigned int u;
|
||||
float f;
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
__half tmp_ff = __float2half(ff);
|
||||
return *(__half_raw*)&tmp_ff;
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
__half_raw h;
|
||||
h.x = _cvtss_sh(ff, 0);
|
||||
return h;
|
||||
|
||||
#else
|
||||
float32_bits f; f.f = ff;
|
||||
|
||||
const float32_bits f32infty = { 255 << 23 };
|
||||
const float32_bits f16max = { (127 + 16) << 23 };
|
||||
const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
||||
unsigned int sign_mask = 0x80000000u;
|
||||
__half_raw o;
|
||||
o.x = static_cast<unsigned short>(0x0u);
|
||||
|
||||
unsigned int sign = f.u & sign_mask;
|
||||
f.u ^= sign;
|
||||
|
||||
// NOTE all the integer compares in this function can be safely
|
||||
// compiled into signed compares since all operands are below
|
||||
// 0x80000000. Important if you want fast straight SSE2 code
|
||||
// (since there's no unsigned PCMPGTD).
|
||||
|
||||
if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
|
||||
o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
|
||||
} else { // (De)normalized number or zero
|
||||
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
|
||||
// use a magic value to align our 10 mantissa bits at the bottom of
|
||||
// the float. as long as FP addition is round-to-nearest-even this
|
||||
// just works.
|
||||
f.f += denorm_magic.f;
|
||||
|
||||
// and one integer subtract of the bias later, we have our final float!
|
||||
o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
|
||||
} else {
|
||||
unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
|
||||
|
||||
// update exponent, rounding bias part 1
|
||||
f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
|
||||
// rounding bias part 2
|
||||
f.u += mant_odd;
|
||||
// take the bits!
|
||||
o.x = static_cast<unsigned short>(f.u >> 13);
|
||||
}
|
||||
}
|
||||
|
||||
o.x |= static_cast<unsigned short>(sign >> 16);
|
||||
return o;
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
return __half2float(h);
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
return _cvtsh_ss(h.x);
|
||||
|
||||
#else
|
||||
const float32_bits magic = { 113 << 23 };
|
||||
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||
float32_bits o;
|
||||
|
||||
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
|
||||
unsigned int exp = shifted_exp & o.u; // just the exponent
|
||||
o.u += (127 - 15) << 23; // exponent adjust
|
||||
|
||||
// handle exponent special cases
|
||||
if (exp == shifted_exp) { // Inf/NaN?
|
||||
o.u += (128 - 16) << 23; // extra exp adjust
|
||||
} else if (exp == 0) { // Zero/Denormal?
|
||||
o.u += 1 << 23; // extra exp adjust
|
||||
o.f -= magic.f; // renormalize
|
||||
}
|
||||
|
||||
o.u |= (h.x & 0x8000) << 16; // sign bit
|
||||
return o.f;
|
||||
#endif
|
||||
}
|
||||
|
||||
// --- standard functions ---
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
|
||||
return (a.x & 0x7fff) == 0x7c00;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hisnan(a);
|
||||
#else
|
||||
return (a.x & 0x7fff) > 0x7c00;
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {
|
||||
return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
|
||||
half result;
|
||||
result.x = a.x & 0x7FFF;
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
||||
return half(hexp(a));
|
||||
#else
|
||||
return half(::expf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return half(::hlog(a));
|
||||
#else
|
||||
return half(::logf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
|
||||
return half(numext::log1p(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
|
||||
return half(::log10f(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
|
||||
return half(hsqrt(a));
|
||||
#else
|
||||
return half(::sqrtf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
|
||||
return half(::powf(float(a), float(b)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
|
||||
return half(::sinf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {
|
||||
return half(::cosf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
|
||||
return half(::tanf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
|
||||
return half(::tanhf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
|
||||
return half(hfloor(a));
|
||||
#else
|
||||
return half(::floorf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
|
||||
return half(hceil(a));
|
||||
#else
|
||||
return half(::ceilf(float(a)));
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hlt(b, a) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
const float f2 = static_cast<float>(b);
|
||||
return f2 < f1 ? b : a;
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hlt(a, b) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
const float f2 = static_cast<float>(b);
|
||||
return f1 < f2 ? b : a;
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {
|
||||
os << static_cast<float>(v);
|
||||
return os;
|
||||
}
|
||||
|
||||
} // end namespace half_impl
|
||||
|
||||
// import Eigen::half_impl::half into Eigen namespace
|
||||
// using half_impl::half;
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<>
|
||||
struct random_default_impl<half, false, false>
|
||||
{
|
||||
static inline half run(const half& x, const half& y)
|
||||
{
|
||||
return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));
|
||||
}
|
||||
static inline half run()
|
||||
{
|
||||
return run(half(-1.f), half(1.f));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct is_arithmetic<half> { enum { value = true }; };
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<> struct NumTraits<Eigen::half>
|
||||
: GenericNumTraits<Eigen::half>
|
||||
{
|
||||
enum {
|
||||
IsSigned = true,
|
||||
IsInteger = false,
|
||||
IsComplex = false,
|
||||
RequireInitialization = false
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
|
||||
return half_impl::raw_uint16_to_half(0x0800);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); }
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
|
||||
return half_impl::raw_uint16_to_half(0x7bff);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
|
||||
return half_impl::raw_uint16_to_half(0xfbff);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
|
||||
return half_impl::raw_uint16_to_half(0x7c00);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
|
||||
return half_impl::raw_uint16_to_half(0x7c01);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
// C-like standard mathematical functions and trancendentals.
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
|
||||
Eigen::half result;
|
||||
result.x = a.x & 0x7FFF;
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
|
||||
return Eigen::half(::expf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
|
||||
#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return Eigen::half(::hlog(a));
|
||||
#else
|
||||
return Eigen::half(::logf(float(a)));
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
|
||||
return Eigen::half(::sqrtf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
|
||||
return Eigen::half(::powf(float(a), float(b)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
|
||||
return Eigen::half(::floorf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
|
||||
return Eigen::half(::ceilf(float(a)));
|
||||
}
|
||||
|
||||
namespace std {
|
||||
|
||||
#if __cplusplus > 199711L
|
||||
template <>
|
||||
struct hash<Eigen::half> {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
|
||||
return static_cast<std::size_t>(a.x);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace std
|
||||
|
||||
|
||||
// Add the missing shfl_xor intrinsic
|
||||
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|
||||
#if EIGEN_CUDACC_VER < 90000
|
||||
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
|
||||
#else
|
||||
return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
|
||||
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
|
||||
return Eigen::half_impl::raw_uint16_to_half(
|
||||
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
namespace Eigen {
|
||||
namespace numext {
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool (isnan)(const Eigen::half& h) {
|
||||
return (half_impl::isnan)(h);
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool (isinf)(const Eigen::half& h) {
|
||||
return (half_impl::isinf)(h);
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool (isfinite)(const Eigen::half& h) {
|
||||
return (half_impl::isfinite)(h);
|
||||
}
|
||||
|
||||
} // namespace Eigen
|
||||
} // namespace numext
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_HALF_CUDA_H
|
||||
91
external/include/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h
vendored
Normal file
91
external/include/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_CUDA_H
|
||||
#define EIGEN_MATH_FUNCTIONS_CUDA_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Make sure this is only available when targeting a GPU: we don't want to
|
||||
// introduce conflicts between these packet_traits definitions and the ones
|
||||
// we'll use on the host side (SSE, AVX, ...)
|
||||
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 plog<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 plog<double2>(const double2& a)
|
||||
{
|
||||
using ::log;
|
||||
return make_double2(log(a.x), log(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 plog1p<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 plog1p<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(log1p(a.x), log1p(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 pexp<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 pexp<double2>(const double2& a)
|
||||
{
|
||||
using ::exp;
|
||||
return make_double2(exp(a.x), exp(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 psqrt<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 psqrt<double2>(const double2& a)
|
||||
{
|
||||
using ::sqrt;
|
||||
return make_double2(sqrt(a.x), sqrt(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 prsqrt<float4>(const float4& a)
|
||||
{
|
||||
return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 prsqrt<double2>(const double2& a)
|
||||
{
|
||||
return make_double2(rsqrt(a.x), rsqrt(a.y));
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_CUDA_H
|
||||
333
external/include/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h
vendored
Normal file
333
external/include/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h
vendored
Normal file
@@ -0,0 +1,333 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_CUDA_H
|
||||
#define EIGEN_PACKET_MATH_CUDA_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Make sure this is only available when targeting a GPU: we don't want to
|
||||
// introduce conflicts between these packet_traits definitions and the ones
|
||||
// we'll use on the host side (SSE, AVX, ...)
|
||||
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
||||
template<> struct is_arithmetic<float4> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<double2> { enum { value = true }; };
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef float4 type;
|
||||
typedef float4 half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasLGamma = 1,
|
||||
HasDiGamma = 1,
|
||||
HasZeta = 1,
|
||||
HasPolygamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
HasIGamma = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
|
||||
HasBlend = 0,
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef double2 type;
|
||||
typedef double2 half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasLGamma = 1,
|
||||
HasDiGamma = 1,
|
||||
HasZeta = 1,
|
||||
HasPolygamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
HasIGamma = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
|
||||
HasBlend = 0,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
|
||||
template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
|
||||
return make_float4(from, from, from, from);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
|
||||
return make_double2(from, from);
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
|
||||
return make_float4(a, a+1, a+2, a+3);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
|
||||
return make_double2(a, a+1);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
|
||||
return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(a.x+b.x, a.y+b.y);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
|
||||
return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(a.x-b.x, a.y-b.y);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
|
||||
return make_float4(-a.x, -a.y, -a.z, -a.w);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
|
||||
return make_double2(-a.x, -a.y);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
|
||||
return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(a.x*b.x, a.y*b.y);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
|
||||
return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(a.x/b.x, a.y/b.y);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
|
||||
return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
|
||||
return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
|
||||
return *reinterpret_cast<const float4*>(from);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
|
||||
return *reinterpret_cast<const double2*>(from);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
|
||||
return make_float4(from[0], from[1], from[2], from[3]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
|
||||
return make_double2(from[0], from[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
|
||||
return make_float4(from[0], from[0], from[1], from[1]);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
|
||||
return make_double2(from[0], from[0]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
|
||||
*reinterpret_cast<float4*>(to) = from;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
|
||||
*reinterpret_cast<double2*>(to) = from;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
|
||||
to[0] = from.x;
|
||||
to[1] = from.y;
|
||||
to[2] = from.z;
|
||||
to[3] = from.w;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
|
||||
to[0] = from.x;
|
||||
to[1] = from.y;
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return __ldg((const float4*)from);
|
||||
#else
|
||||
return make_float4(from[0], from[1], from[2], from[3]);
|
||||
#endif
|
||||
}
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return __ldg((const double2*)from);
|
||||
#else
|
||||
return make_double2(from[0], from[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
|
||||
#else
|
||||
return make_float4(from[0], from[1], from[2], from[3]);
|
||||
#endif
|
||||
}
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
return make_double2(__ldg(from+0), __ldg(from+1));
|
||||
#else
|
||||
return make_double2(from[0], from[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
|
||||
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
|
||||
return make_double2(from[0*stride], from[1*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
|
||||
to[stride*0] = from.x;
|
||||
to[stride*1] = from.y;
|
||||
to[stride*2] = from.z;
|
||||
to[stride*3] = from.w;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
|
||||
to[stride*0] = from.x;
|
||||
to[stride*1] = from.y;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
|
||||
return a.x;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
|
||||
return a.x;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
|
||||
return a.x + a.y + a.z + a.w;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
|
||||
return a.x + a.y;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
|
||||
return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
|
||||
return fmax(a.x, a.y);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
|
||||
return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
|
||||
return fmin(a.x, a.y);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
|
||||
return a.x * a.y * a.z * a.w;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
|
||||
return a.x * a.y;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
|
||||
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
||||
return make_double2(fabs(a.x), fabs(a.y));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
float tmp = kernel.packet[0].y;
|
||||
kernel.packet[0].y = kernel.packet[1].x;
|
||||
kernel.packet[1].x = tmp;
|
||||
|
||||
tmp = kernel.packet[0].z;
|
||||
kernel.packet[0].z = kernel.packet[2].x;
|
||||
kernel.packet[2].x = tmp;
|
||||
|
||||
tmp = kernel.packet[0].w;
|
||||
kernel.packet[0].w = kernel.packet[3].x;
|
||||
kernel.packet[3].x = tmp;
|
||||
|
||||
tmp = kernel.packet[1].z;
|
||||
kernel.packet[1].z = kernel.packet[2].y;
|
||||
kernel.packet[2].y = tmp;
|
||||
|
||||
tmp = kernel.packet[1].w;
|
||||
kernel.packet[1].w = kernel.packet[3].y;
|
||||
kernel.packet[3].y = tmp;
|
||||
|
||||
tmp = kernel.packet[2].w;
|
||||
kernel.packet[2].w = kernel.packet[3].z;
|
||||
kernel.packet[3].z = tmp;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<double2,2>& kernel) {
|
||||
double tmp = kernel.packet[0].y;
|
||||
kernel.packet[0].y = kernel.packet[1].x;
|
||||
kernel.packet[1].x = tmp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_CUDA_H
|
||||
1124
external/include/eigen3/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
vendored
Normal file
1124
external/include/eigen3/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
212
external/include/eigen3/Eigen/src/Core/arch/CUDA/TypeCasting.h
vendored
Normal file
212
external/include/eigen3/Eigen/src/Core/arch/CUDA/TypeCasting.h
vendored
Normal file
@@ -0,0 +1,212 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TYPE_CASTING_CUDA_H
|
||||
#define EIGEN_TYPE_CASTING_CUDA_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<>
|
||||
struct scalar_cast_op<float, Eigen::half> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef Eigen::half result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __float2half(a);
|
||||
#else
|
||||
return Eigen::half(a);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct functor_traits<scalar_cast_op<float, Eigen::half> >
|
||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||
|
||||
|
||||
template<>
|
||||
struct scalar_cast_op<int, Eigen::half> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef Eigen::half result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __float2half(static_cast<float>(a));
|
||||
#else
|
||||
return Eigen::half(static_cast<float>(a));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct functor_traits<scalar_cast_op<int, Eigen::half> >
|
||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||
|
||||
|
||||
template<>
|
||||
struct scalar_cast_op<Eigen::half, float> {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||
typedef float result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __half2float(a);
|
||||
#else
|
||||
return static_cast<float>(a);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct functor_traits<scalar_cast_op<Eigen::half, float> >
|
||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||
|
||||
|
||||
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<Eigen::half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 2,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
|
||||
float2 r1 = __half22float2(a);
|
||||
float2 r2 = __half22float2(b);
|
||||
return make_float4(r1.x, r1.y, r2.x, r2.y);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, Eigen::half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 2
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
||||
// Simply discard the second half of the input
|
||||
return __floats2half2_rn(a.x, a.y);
|
||||
}
|
||||
|
||||
#elif defined EIGEN_VECTORIZE_AVX512
|
||||
template <>
|
||||
struct type_casting_traits<half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
|
||||
return half2float(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
|
||||
return float2half(a);
|
||||
}
|
||||
|
||||
#elif defined EIGEN_VECTORIZE_AVX
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<Eigen::half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
||||
return half2float(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, Eigen::half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
|
||||
return float2half(a);
|
||||
}
|
||||
|
||||
// Disable the following code since it's broken on too many platforms / compilers.
|
||||
//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
|
||||
#elif 0
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<Eigen::half, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
|
||||
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
||||
Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
||||
float f1 = static_cast<float>(h);
|
||||
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
||||
float f2 = static_cast<float>(h);
|
||||
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
|
||||
float f3 = static_cast<float>(h);
|
||||
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
|
||||
float f4 = static_cast<float>(h);
|
||||
return _mm_set_ps(f4, f3, f2, f1);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, Eigen::half> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
|
||||
EIGEN_ALIGN16 float aux[4];
|
||||
pstore(aux, a);
|
||||
Eigen::half h0(aux[0]);
|
||||
Eigen::half h1(aux[1]);
|
||||
Eigen::half h2(aux[2]);
|
||||
Eigen::half h3(aux[3]);
|
||||
|
||||
Packet4h result;
|
||||
result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TYPE_CASTING_CUDA_H
|
||||
29
external/include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h
vendored
Normal file
29
external/include/eigen3/Eigen/src/Core/arch/Default/ConjHelper.h
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ARCH_CONJ_HELPER_H
|
||||
#define EIGEN_ARCH_CONJ_HELPER_H
|
||||
|
||||
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
|
||||
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
|
||||
{ return padd(c, pmul(x,y)); } \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
|
||||
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
|
||||
}; \
|
||||
\
|
||||
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
|
||||
{ return padd(c, pmul(x,y)); } \
|
||||
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
|
||||
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
|
||||
};
|
||||
|
||||
#endif // EIGEN_ARCH_CONJ_HELPER_H
|
||||
49
external/include/eigen3/Eigen/src/Core/arch/Default/Settings.h
vendored
Normal file
49
external/include/eigen3/Eigen/src/Core/arch/Default/Settings.h
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
/* All the parameters defined in this file can be specialized in the
|
||||
* architecture specific files, and/or by the user.
|
||||
* More to come... */
|
||||
|
||||
#ifndef EIGEN_DEFAULT_SETTINGS_H
|
||||
#define EIGEN_DEFAULT_SETTINGS_H
|
||||
|
||||
/** Defines the maximal loop size to enable meta unrolling of loops.
|
||||
* Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
|
||||
* it does not correspond to the number of iterations or the number of instructions
|
||||
*/
|
||||
#ifndef EIGEN_UNROLLING_LIMIT
|
||||
#define EIGEN_UNROLLING_LIMIT 100
|
||||
#endif
|
||||
|
||||
/** Defines the threshold between a "small" and a "large" matrix.
|
||||
* This threshold is mainly used to select the proper product implementation.
|
||||
*/
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
/** Defines the maximal width of the blocks used in the triangular product and solver
|
||||
* for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
|
||||
*/
|
||||
#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
|
||||
#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
|
||||
#endif
|
||||
|
||||
|
||||
/** Defines the default number of registers available for that architecture.
|
||||
* Currently it must be 8 or 16. Other values will fail.
|
||||
*/
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_DEFAULT_SETTINGS_H
|
||||
490
external/include/eigen3/Eigen/src/Core/arch/NEON/Complex.h
vendored
Normal file
490
external/include/eigen3/Eigen/src/Core/arch/NEON/Complex.h
vendored
Normal file
@@ -0,0 +1,490 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_NEON_H
|
||||
#define EIGEN_COMPLEX_NEON_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
inline uint32x4_t p4ui_CONJ_XOR() {
|
||||
// See bug 1325, clang fails to call vld1q_u64.
|
||||
#if EIGEN_COMP_CLANG
|
||||
uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
return ret;
|
||||
#else
|
||||
static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
|
||||
return vld1q_u32( conj_XOR_DATA );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint32x2_t p2ui_CONJ_XOR() {
|
||||
static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
|
||||
return vld1_u32( conj_XOR_DATA );
|
||||
}
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
Packet4f v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
float32x2_t r64;
|
||||
r64 = vld1_f32((const float *)&from);
|
||||
|
||||
return Packet2cf(vcombine_f32(r64, r64));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
||||
{
|
||||
Packet4ui b = vreinterpretq_u32_f32(a.v);
|
||||
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
Packet4f v1, v2;
|
||||
|
||||
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
||||
v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
|
||||
// Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
|
||||
v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
|
||||
// Multiply the real a with b
|
||||
v1 = vmulq_f32(v1, b.v);
|
||||
// Multiply the imag a with b
|
||||
v2 = vmulq_f32(v2, b.v);
|
||||
// Conjugate v2
|
||||
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
|
||||
// Swap real/imag elements in v2.
|
||||
v2 = vrev64q_f32(v2);
|
||||
// Add and return the result
|
||||
return Packet2cf(vaddq_f32(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
Packet4f res = pset1<Packet4f>(0.f);
|
||||
res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
|
||||
res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
|
||||
res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
|
||||
res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
|
||||
return Packet2cf(res);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
|
||||
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 x[2];
|
||||
vst1q_f32((float *)x, a.v);
|
||||
return x[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi;
|
||||
Packet4f a_r128;
|
||||
|
||||
a_lo = vget_low_f32(a.v);
|
||||
a_hi = vget_high_f32(a.v);
|
||||
a_r128 = vcombine_f32(a_hi, a_lo);
|
||||
|
||||
return Packet2cf(a_r128);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
return Packet2cf(vrev64q_f32(a.v));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
float32x2_t a1, a2;
|
||||
std::complex<float> s;
|
||||
|
||||
a1 = vget_low_f32(a.v);
|
||||
a2 = vget_high_f32(a.v);
|
||||
a2 = vadd_f32(a1, a2);
|
||||
vst1_f32((float *)&s, a2);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
Packet4f sum1, sum2, sum;
|
||||
|
||||
// Add the first two 64-bit float32x2_t of vecs[0]
|
||||
sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
|
||||
sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
|
||||
sum = vaddq_f32(sum1, sum2);
|
||||
|
||||
return Packet2cf(sum);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
float32x2_t a1, a2, v1, v2, prod;
|
||||
std::complex<float> s;
|
||||
|
||||
a1 = vget_low_f32(a.v);
|
||||
a2 = vget_high_f32(a.v);
|
||||
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
|
||||
v1 = vdup_lane_f32(a1, 0);
|
||||
// Get the real values of a | a1_im | a1_im | a2_im | a2_im |
|
||||
v2 = vdup_lane_f32(a1, 1);
|
||||
// Multiply the real a with b
|
||||
v1 = vmul_f32(v1, a2);
|
||||
// Multiply the imag a with b
|
||||
v2 = vmul_f32(v2, a2);
|
||||
// Conjugate v2
|
||||
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
|
||||
// Swap real/imag elements in v2.
|
||||
v2 = vrev64_f32(v2);
|
||||
// Add v1, v2
|
||||
prod = vadd_f32(v1, v2);
|
||||
|
||||
vst1_f32((float *)&s, prod);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first.v = vextq_f32(first.v, second.v, 2);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for NEON
|
||||
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
||||
Packet4f s, rev_s;
|
||||
|
||||
// this computes the norm
|
||||
s = vmulq_f32(b.v, b.v);
|
||||
rev_s = vrev64q_f32(s);
|
||||
|
||||
return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
|
||||
kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
|
||||
kernel.packet[1].v = tmp;
|
||||
}
|
||||
|
||||
//---------- double ----------
|
||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||
|
||||
// See bug 1325, clang fails to call vld1q_u64.
|
||||
#if EIGEN_COMP_CLANG
|
||||
static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
|
||||
#else
|
||||
const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
|
||||
static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
|
||||
#endif
|
||||
|
||||
struct Packet1cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
||||
Packet2d v;
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
Packet2d v1, v2;
|
||||
|
||||
// Get the real values of a
|
||||
v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
|
||||
// Get the imag values of a
|
||||
v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
|
||||
// Multiply the real a with b
|
||||
v1 = vmulq_f64(v1, b.v);
|
||||
// Multiply the imag a with b
|
||||
v2 = vmulq_f64(v2, b.v);
|
||||
// Conjugate v2
|
||||
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
|
||||
// Swap real/imag elements in v2.
|
||||
v2 = preverse<Packet2d>(v2);
|
||||
// Add and return the result
|
||||
return Packet1cd(vaddq_f64(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
|
||||
{
|
||||
Packet2d res = pset1<Packet2d>(0.0);
|
||||
res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
|
||||
res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
|
||||
return Packet1cd(res);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
|
||||
{
|
||||
to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
std::complex<double> EIGEN_ALIGN16 res;
|
||||
pstore<std::complex<double> >(&res, a);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
||||
{
|
||||
// FIXME is it sure we never have to align a Packet1cd?
|
||||
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for NEON
|
||||
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
||||
Packet2d s = pmul<Packet2d>(b.v, b.v);
|
||||
Packet2d rev_s = preverse<Packet2d>(s);
|
||||
|
||||
return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
||||
{
|
||||
Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
|
||||
kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
|
||||
kernel.packet[1].v = tmp;
|
||||
}
|
||||
#endif // EIGEN_ARCH_ARM64
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_NEON_H
|
||||
91
external/include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h
vendored
Normal file
91
external/include/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/* The sin, cos, exp, and log functions of this file come from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_NEON_H
|
||||
#define EIGEN_MATH_FUNCTIONS_NEON_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
Packet4f tmp, fx;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
x = vminq_f32(x, p4f_exp_hi);
|
||||
x = vmaxq_f32(x, p4f_exp_lo);
|
||||
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF);
|
||||
|
||||
/* perform a floorf */
|
||||
tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
|
||||
|
||||
/* if greater, substract 1 */
|
||||
Packet4ui mask = vcgtq_f32(tmp, fx);
|
||||
mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1));
|
||||
|
||||
fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
|
||||
|
||||
tmp = vmulq_f32(fx, p4f_cephes_exp_C1);
|
||||
Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2);
|
||||
x = vsubq_f32(x, tmp);
|
||||
x = vsubq_f32(x, z);
|
||||
|
||||
Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x);
|
||||
z = vmulq_f32(x, x);
|
||||
y = vaddq_f32(y, p4f_cephes_exp_p1);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, p4f_cephes_exp_p2);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, p4f_cephes_exp_p3);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, p4f_cephes_exp_p4);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, p4f_cephes_exp_p5);
|
||||
|
||||
y = vmulq_f32(y, z);
|
||||
y = vaddq_f32(y, x);
|
||||
y = vaddq_f32(y, p4f_1);
|
||||
|
||||
/* build 2^n */
|
||||
int32x4_t mm;
|
||||
mm = vcvtq_s32_f32(fx);
|
||||
mm = vaddq_s32(mm, p4i_0x7f);
|
||||
mm = vshlq_n_s32(mm, 23);
|
||||
Packet4f pow2n = vreinterpretq_f32_s32(mm);
|
||||
|
||||
y = vmulq_f32(y, pow2n);
|
||||
return y;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_NEON_H
|
||||
760
external/include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h
vendored
Normal file
760
external/include/eigen3/Eigen/src/Core/arch/NEON/PacketMath.h
vendored
Normal file
@@ -0,0 +1,760 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
|
||||
// Heavily based on Gael's SSE version.
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_NEON_H
|
||||
#define EIGEN_PACKET_MATH_NEON_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
|
||||
#else
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC
|
||||
|
||||
// In MSVC's arm_neon.h header file, all NEON vector types
|
||||
// are aliases to the same underlying type __n128.
|
||||
// We thus have to wrap them to make them different C++ types.
|
||||
// (See also bug 1428)
|
||||
|
||||
template<typename T,int unique_id>
|
||||
struct eigen_packet_wrapper
|
||||
{
|
||||
operator T&() { return m_val; }
|
||||
operator const T&() const { return m_val; }
|
||||
eigen_packet_wrapper() {}
|
||||
eigen_packet_wrapper(const T &v) : m_val(v) {}
|
||||
eigen_packet_wrapper& operator=(const T &v) {
|
||||
m_val = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
T m_val;
|
||||
};
|
||||
typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
|
||||
typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
|
||||
typedef eigen_packet_wrapper<int32x4_t ,2> Packet4i;
|
||||
typedef eigen_packet_wrapper<int32x2_t ,3> Packet2i;
|
||||
typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
|
||||
|
||||
#else
|
||||
|
||||
typedef float32x2_t Packet2f;
|
||||
typedef float32x4_t Packet4f;
|
||||
typedef int32x4_t Packet4i;
|
||||
typedef int32x2_t Packet2i;
|
||||
typedef uint32x4_t Packet4ui;
|
||||
|
||||
#endif // EIGEN_COMP_MSVC
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
||||
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||
|
||||
#if EIGEN_ARCH_ARM64
|
||||
// __builtin_prefetch tends to do nothing on ARM64 compilers because the
|
||||
// prefetch instructions there are too detailed for __builtin_prefetch to map
|
||||
// meaningfully to them.
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
|
||||
#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
|
||||
#elif defined __pld
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
|
||||
#elif EIGEN_ARCH_ARM32
|
||||
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
|
||||
#else
|
||||
// by default no explicit prefetching
|
||||
#define EIGEN_ARM_PREFETCH(ADDR)
|
||||
#endif
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half; // Packet2f intrinsics not implemented yet
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket=0, // Packet2f intrinsics not implemented yet
|
||||
|
||||
HasDiv = 1,
|
||||
// FIXME check the Has*
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<int32_t> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half; // Packet2i intrinsics not implemented yet
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket=0 // Packet2i intrinsics not implemented yet
|
||||
// FIXME check the Has*
|
||||
};
|
||||
};
|
||||
|
||||
#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
|
||||
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
|
||||
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
|
||||
EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
|
||||
EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) { return vdupq_n_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
|
||||
{
|
||||
const float f[] = {0, 1, 2, 3};
|
||||
Packet4f countdown = vld1q_f32(f);
|
||||
return vaddq_f32(pset1<Packet4f>(a), countdown);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a)
|
||||
{
|
||||
const int32_t i[] = {0, 1, 2, 3};
|
||||
Packet4i countdown = vld1q_s32(i);
|
||||
return vaddq_s32(pset1<Packet4i>(a), countdown);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
#if EIGEN_ARCH_ARM64
|
||||
return vdivq_f32(a,b);
|
||||
#else
|
||||
Packet4f inv, restep, div;
|
||||
|
||||
// NEON does not offer a divide instruction, we have to do a reciprocal approximation
|
||||
// However NEON in contrast to other SIMD engines (AltiVec/SSE), offers
|
||||
// a reciprocal estimate AND a reciprocal step -which saves a few instructions
|
||||
// vrecpeq_f32() returns an estimate to 1/b, which we will finetune with
|
||||
// Newton-Raphson and vrecpsq_f32()
|
||||
inv = vrecpeq_f32(b);
|
||||
|
||||
// This returns a differential, by which we will have to multiply inv to get a better
|
||||
// approximation of 1/b.
|
||||
restep = vrecpsq_f32(b, inv);
|
||||
inv = vmulq_f32(restep, inv);
|
||||
|
||||
// Finally, multiply a by 1/b and get the wanted result of the division.
|
||||
div = vmulq_f32(a, inv);
|
||||
|
||||
return div;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
|
||||
{ eigen_assert(false && "packet integer division are not supported by NEON");
|
||||
return pset1<Packet4i>(0);
|
||||
}
|
||||
|
||||
// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available,
|
||||
// then implements a slow software scalar fallback calling fmaf()!
|
||||
// Filed LLVM bug:
|
||||
// https://llvm.org/bugs/show_bug.cgi?id=27216
|
||||
#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
|
||||
// See bug 936.
|
||||
// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
|
||||
// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
|
||||
// MLA is not fused i.e. does 2 roundings.
|
||||
// In addition to giving better accuracy, FMA also gives better performance here on a Krait (Nexus 4):
|
||||
// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
|
||||
#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
|
||||
// Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu,
|
||||
// at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on
|
||||
// -march=armv7-a, that is a very common case.
|
||||
// See e.g. this thread:
|
||||
// http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
|
||||
// Filed LLVM bug:
|
||||
// https://llvm.org/bugs/show_bug.cgi?id=27219
|
||||
Packet4f r = c;
|
||||
asm volatile(
|
||||
"vmla.f32 %q[r], %q[a], %q[b]"
|
||||
: [r] "+w" (r)
|
||||
: [a] "w" (a),
|
||||
[b] "w" (b)
|
||||
: );
|
||||
return r;
|
||||
#else
|
||||
return vmlaq_f32(c,a,b);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// No FMA instruction for int, so use MLA unconditionally.
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
|
||||
|
||||
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
float32x2_t lo, hi;
|
||||
lo = vld1_dup_f32(from);
|
||||
hi = vld1_dup_f32(from+1);
|
||||
return vcombine_f32(lo, hi);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from)
|
||||
{
|
||||
int32x2_t lo, hi;
|
||||
lo = vld1_dup_s32(from);
|
||||
hi = vld1_dup_s32(from+1);
|
||||
return vcombine_s32(lo, hi);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float> (float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
Packet4f res = pset1<Packet4f>(0.f);
|
||||
res = vsetq_lane_f32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_f32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_f32(from[2*stride], res, 2);
|
||||
res = vsetq_lane_f32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride)
|
||||
{
|
||||
Packet4i res = pset1<Packet4i>(0);
|
||||
res = vsetq_lane_s32(from[0*stride], res, 0);
|
||||
res = vsetq_lane_s32(from[1*stride], res, 1);
|
||||
res = vsetq_lane_s32(from[2*stride], res, 2);
|
||||
res = vsetq_lane_s32(from[3*stride], res, 3);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_f32(from, 0);
|
||||
to[stride*1] = vgetq_lane_f32(from, 1);
|
||||
to[stride*2] = vgetq_lane_f32(from, 2);
|
||||
to[stride*3] = vgetq_lane_f32(from, 3);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_s32(from, 0);
|
||||
to[stride*1] = vgetq_lane_s32(from, 1);
|
||||
to[stride*2] = vgetq_lane_s32(from, 2);
|
||||
to[stride*3] = vgetq_lane_s32(from, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
|
||||
// FIXME only store the 2 first elements ?
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
|
||||
float32x2_t a_lo, a_hi;
|
||||
Packet4f a_r64;
|
||||
|
||||
a_r64 = vrev64q_f32(a);
|
||||
a_lo = vget_low_f32(a_r64);
|
||||
a_hi = vget_high_f32(a_r64);
|
||||
return vcombine_f32(a_hi, a_lo);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
|
||||
int32x2_t a_lo, a_hi;
|
||||
Packet4i a_r64;
|
||||
|
||||
a_r64 = vrev64q_s32(a);
|
||||
a_lo = vget_low_s32(a_r64);
|
||||
a_hi = vget_high_s32(a_r64);
|
||||
return vcombine_s32(a_hi, a_lo);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi, sum;
|
||||
|
||||
a_lo = vget_low_f32(a);
|
||||
a_hi = vget_high_f32(a);
|
||||
sum = vpadd_f32(a_lo, a_hi);
|
||||
sum = vpadd_f32(sum, sum);
|
||||
return vget_lane_f32(sum, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
float32x4x2_t vtrn1, vtrn2, res1, res2;
|
||||
Packet4f sum1, sum2, sum;
|
||||
|
||||
// NEON zip performs interleaving of the supplied vectors.
|
||||
// We perform two interleaves in a row to acquire the transposed vector
|
||||
vtrn1 = vzipq_f32(vecs[0], vecs[2]);
|
||||
vtrn2 = vzipq_f32(vecs[1], vecs[3]);
|
||||
res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
|
||||
res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
|
||||
|
||||
// Do the addition of the resulting vectors
|
||||
sum1 = vaddq_f32(res1.val[0], res1.val[1]);
|
||||
sum2 = vaddq_f32(res2.val[0], res2.val[1]);
|
||||
sum = vaddq_f32(sum1, sum2);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, sum;
|
||||
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
sum = vpadd_s32(a_lo, a_hi);
|
||||
sum = vpadd_s32(sum, sum);
|
||||
return vget_lane_s32(sum, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
{
|
||||
int32x4x2_t vtrn1, vtrn2, res1, res2;
|
||||
Packet4i sum1, sum2, sum;
|
||||
|
||||
// NEON zip performs interleaving of the supplied vectors.
|
||||
// We perform two interleaves in a row to acquire the transposed vector
|
||||
vtrn1 = vzipq_s32(vecs[0], vecs[2]);
|
||||
vtrn2 = vzipq_s32(vecs[1], vecs[3]);
|
||||
res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
|
||||
res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
|
||||
|
||||
// Do the addition of the resulting vectors
|
||||
sum1 = vaddq_s32(res1.val[0], res1.val[1]);
|
||||
sum2 = vaddq_s32(res2.val[0], res2.val[1]);
|
||||
sum = vaddq_s32(sum1, sum2);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Other reduction functions:
|
||||
// mul
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi, prod;
|
||||
|
||||
// Get a_lo = |a1|a2| and a_hi = |a3|a4|
|
||||
a_lo = vget_low_f32(a);
|
||||
a_hi = vget_high_f32(a);
|
||||
// Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
|
||||
prod = vmul_f32(a_lo, a_hi);
|
||||
// Multiply prod with its swapped value |a2*a4|a1*a3|
|
||||
prod = vmul_f32(prod, vrev64_f32(prod));
|
||||
|
||||
return vget_lane_f32(prod, 0);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, prod;
|
||||
|
||||
// Get a_lo = |a1|a2| and a_hi = |a3|a4|
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
// Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
|
||||
prod = vmul_s32(a_lo, a_hi);
|
||||
// Multiply prod with its swapped value |a2*a4|a1*a3|
|
||||
prod = vmul_s32(prod, vrev64_s32(prod));
|
||||
|
||||
return vget_lane_s32(prod, 0);
|
||||
}
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi, min;
|
||||
|
||||
a_lo = vget_low_f32(a);
|
||||
a_hi = vget_high_f32(a);
|
||||
min = vpmin_f32(a_lo, a_hi);
|
||||
min = vpmin_f32(min, min);
|
||||
|
||||
return vget_lane_f32(min, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, min;
|
||||
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
min = vpmin_s32(a_lo, a_hi);
|
||||
min = vpmin_s32(min, min);
|
||||
|
||||
return vget_lane_s32(min, 0);
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
float32x2_t a_lo, a_hi, max;
|
||||
|
||||
a_lo = vget_low_f32(a);
|
||||
a_hi = vget_high_f32(a);
|
||||
max = vpmax_f32(a_lo, a_hi);
|
||||
max = vpmax_f32(max, max);
|
||||
|
||||
return vget_lane_f32(max, 0);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
int32x2_t a_lo, a_hi, max;
|
||||
|
||||
a_lo = vget_low_s32(a);
|
||||
a_hi = vget_high_s32(a);
|
||||
max = vpmax_s32(a_lo, a_hi);
|
||||
max = vpmax_s32(max, max);
|
||||
|
||||
return vget_lane_s32(max, 0);
|
||||
}
|
||||
|
||||
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
|
||||
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
|
||||
#define PALIGN_NEON(Offset,Type,Command) \
|
||||
template<>\
|
||||
struct palign_impl<Offset,Type>\
|
||||
{\
|
||||
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
|
||||
{\
|
||||
if (Offset!=0)\
|
||||
first = Command(first, second, Offset);\
|
||||
}\
|
||||
};\
|
||||
|
||||
PALIGN_NEON(0,Packet4f,vextq_f32)
|
||||
PALIGN_NEON(1,Packet4f,vextq_f32)
|
||||
PALIGN_NEON(2,Packet4f,vextq_f32)
|
||||
PALIGN_NEON(3,Packet4f,vextq_f32)
|
||||
PALIGN_NEON(0,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(1,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(2,Packet4i,vextq_s32)
|
||||
PALIGN_NEON(3,Packet4i,vextq_s32)
|
||||
|
||||
#undef PALIGN_NEON
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
|
||||
float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
|
||||
kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
|
||||
kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
|
||||
kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
|
||||
int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
|
||||
kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
|
||||
kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
|
||||
kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
|
||||
kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
|
||||
}
|
||||
|
||||
//---------- double ----------
|
||||
|
||||
// Clang 3.5 in the iOS toolchain has an ICE triggered by NEON intrisics for double.
|
||||
// Confirmed at least with __apple_build_version__ = 6000054.
|
||||
#ifdef __apple_build_version__
|
||||
// Let's hope that by the time __apple_build_version__ hits the 601* range, the bug will be fixed.
|
||||
// https://gist.github.com/yamaya/2924292 suggests that the 3 first digits are only updated with
|
||||
// major toolchain updates.
|
||||
#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
|
||||
#else
|
||||
#define EIGEN_APPLE_DOUBLE_NEON_BUG 0
|
||||
#endif
|
||||
|
||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||
|
||||
// Bug 907: workaround missing declarations of the following two functions in the ADK
|
||||
// Defining these functions as templates ensures that if these intrinsics are
|
||||
// already defined in arm_neon.h, then our workaround doesn't cause a conflict
|
||||
// and has lower priority in overload resolution.
|
||||
template <typename T>
|
||||
uint64x2_t vreinterpretq_u64_f64(T a)
|
||||
{
|
||||
return (uint64x2_t) a;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
float64x2_t vreinterpretq_f64_u64(T a)
|
||||
{
|
||||
return (float64x2_t) a;
|
||||
}
|
||||
|
||||
typedef float64x2_t Packet2d;
|
||||
typedef float64x1_t Packet1d;
|
||||
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket=0,
|
||||
|
||||
HasDiv = 1,
|
||||
// FIXME check the Has*
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
|
||||
{
|
||||
const double countdown_raw[] = {0.0,1.0};
|
||||
const Packet2d countdown = vld1q_f64(countdown_raw);
|
||||
return vaddq_f64(pset1<Packet2d>(a), countdown);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }
|
||||
|
||||
#ifdef __ARM_FEATURE_FMA
|
||||
// See bug 936. See above comment about FMA for float.
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vmlaq_f64(c,a,b); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
|
||||
|
||||
// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{
|
||||
return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{
|
||||
return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{
|
||||
return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b)
|
||||
{
|
||||
return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{
|
||||
return vld1q_dup_f64(from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
|
||||
{
|
||||
Packet2d res = pset1<Packet2d>(0.0);
|
||||
res = vsetq_lane_f64(from[0*stride], res, 0);
|
||||
res = vsetq_lane_f64(from[1*stride], res, 1);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
|
||||
{
|
||||
to[stride*0] = vgetq_lane_f64(from, 0);
|
||||
to[stride*1] = vgetq_lane_f64(from, 1);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }
|
||||
|
||||
// FIXME only store the 2 first elements ?
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
|
||||
|
||||
#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
|
||||
// workaround ICE, see bug 907
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
{
|
||||
float64x2_t trn1, trn2;
|
||||
|
||||
// NEON zip performs interleaving of the supplied vectors.
|
||||
// We perform two interleaves in a row to acquire the transposed vector
|
||||
trn1 = vzip1q_f64(vecs[0], vecs[1]);
|
||||
trn2 = vzip2q_f64(vecs[0], vecs[1]);
|
||||
|
||||
// Do the addition of the resulting vectors
|
||||
return vaddq_f64(trn1, trn2);
|
||||
}
|
||||
// Other reduction functions:
|
||||
// mul
|
||||
#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }
|
||||
#endif
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); }
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); }
|
||||
|
||||
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
|
||||
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
|
||||
#define PALIGN_NEON(Offset,Type,Command) \
|
||||
template<>\
|
||||
struct palign_impl<Offset,Type>\
|
||||
{\
|
||||
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
|
||||
{\
|
||||
if (Offset!=0)\
|
||||
first = Command(first, second, Offset);\
|
||||
}\
|
||||
};\
|
||||
|
||||
PALIGN_NEON(0,Packet2d,vextq_f64)
|
||||
PALIGN_NEON(1,Packet2d,vextq_f64)
|
||||
#undef PALIGN_NEON
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
float64x2_t trn1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);
|
||||
float64x2_t trn2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);
|
||||
|
||||
kernel.packet[0] = trn1;
|
||||
kernel.packet[1] = trn2;
|
||||
}
|
||||
#endif // EIGEN_ARCH_ARM64
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_NEON_H
|
||||
471
external/include/eigen3/Eigen/src/Core/arch/SSE/Complex.h
vendored
Normal file
471
external/include/eigen3/Eigen/src/Core/arch/SSE/Complex.h
vendored
Normal file
@@ -0,0 +1,471 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_SSE_H
|
||||
#define EIGEN_COMPLEX_SSE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
|
||||
__m128 v;
|
||||
};
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
|
||||
{
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
|
||||
return Packet2cf(_mm_xor_ps(a.v,mask));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
||||
{
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet2cf(_mm_xor_ps(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
|
||||
_mm_mul_ps(_mm_movehdup_ps(a.v),
|
||||
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
||||
// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
// vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
|
||||
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
||||
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
#if EIGEN_GNUC_AT_MOST(4,2)
|
||||
// Workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
|
||||
res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
|
||||
#elif EIGEN_GNUC_AT_LEAST(4,6)
|
||||
// Suppress annoying "may be used uninitialized in this function" warning with gcc >= 4.6
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wuninitialized"
|
||||
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
|
||||
#pragma GCC diagnostic pop
|
||||
#else
|
||||
res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
|
||||
#endif
|
||||
return Packet2cf(_mm_movelh_ps(res.v,res.v));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
|
||||
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
|
||||
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
#if EIGEN_GNUC_AT_MOST(4,3)
|
||||
// Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
|
||||
// This workaround also fix invalid code generation with gcc 4.3
|
||||
EIGEN_ALIGN16 std::complex<float> res[2];
|
||||
_mm_store_ps((float*)res, a.v);
|
||||
return res[0];
|
||||
#else
|
||||
std::complex<float> res;
|
||||
_mm_storel_pi((__m64*)&res, a.v);
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first.v = _mm_movehl_ps(first.v, first.v);
|
||||
first.v = _mm_movelh_ps(first.v, second.v);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return internal::pmul(a, pconj(b));
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
|
||||
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return internal::pmul(pconj(a), b);
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
|
||||
_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return pconj(internal::pmul(a, b));
|
||||
#else
|
||||
const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
|
||||
return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
|
||||
_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
|
||||
vec4f_swizzle1(b.v, 1, 0, 3, 2))));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for SSE3 and 4
|
||||
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
|
||||
__m128 s = _mm_mul_ps(b.v,b.v);
|
||||
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
|
||||
{
|
||||
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
|
||||
}
|
||||
|
||||
|
||||
//---------- double ----------
|
||||
struct Packet1cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
|
||||
__m128d v;
|
||||
};
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
|
||||
{
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
return Packet1cd(_mm_xor_pd(a.v,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
|
||||
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
|
||||
vec2d_swizzle1(b.v, 1, 0))));
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
|
||||
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
|
||||
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
|
||||
vec2d_swizzle1(b.v, 1, 0)), mask)));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
|
||||
|
||||
// FIXME force unaligned load, this is a temporary fix
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
|
||||
// FIXME force unaligned store, this is a temporary fix
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
EIGEN_ALIGN16 double res[2];
|
||||
_mm_store_pd(res, a.v);
|
||||
return std::complex<double>(res[0],res[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
|
||||
{
|
||||
return vecs[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
||||
{
|
||||
// FIXME is it sure we never have to align a Packet1cd?
|
||||
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return internal::pmul(a, pconj(b));
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
|
||||
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
|
||||
vec2d_swizzle1(b.v, 1, 0))));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return internal::pmul(pconj(a), b);
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
|
||||
_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
|
||||
vec2d_swizzle1(b.v, 1, 0)), mask)));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
return pconj(internal::pmul(a, b));
|
||||
#else
|
||||
const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
|
||||
return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
|
||||
_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
|
||||
vec2d_swizzle1(b.v, 1, 0))));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for SSE3 and 4
|
||||
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
||||
__m128d s = _mm_mul_pd(b.v,b.v);
|
||||
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2cf,2>& kernel) {
|
||||
__m128d w1 = _mm_castps_pd(kernel.packet[0].v);
|
||||
__m128d w2 = _mm_castps_pd(kernel.packet[1].v);
|
||||
|
||||
__m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
|
||||
kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
|
||||
kernel.packet[1].v = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
__m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
|
||||
return Packet2cf(_mm_castpd_ps(result));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
|
||||
{
|
||||
return pset1<Packet1cd>(b);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
|
||||
{
|
||||
return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
|
||||
{
|
||||
return pset1<Packet1cd>(b);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_SSE_H
|
||||
562
external/include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h
vendored
Normal file
562
external/include/eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h
vendored
Normal file
@@ -0,0 +1,562 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007 Julien Pommier
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/* The sin, cos, exp, and log functions of this file come from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
|
||||
#define EIGEN_MATH_FUNCTIONS_SSE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
|
||||
|
||||
/* the smallest non denormalized float number */
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf, 0xff800000);//-1.f/0.f);
|
||||
|
||||
/* natural logarithm computed for 4 simultaneous float
|
||||
return NaN for x <= 0
|
||||
*/
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
|
||||
|
||||
|
||||
Packet4i emm0;
|
||||
|
||||
Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN
|
||||
Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
|
||||
|
||||
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
|
||||
emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
|
||||
|
||||
/* keep only the fractional part */
|
||||
x = _mm_and_ps(x, p4f_inv_mant_mask);
|
||||
x = _mm_or_ps(x, p4f_half);
|
||||
|
||||
emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
|
||||
Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
|
||||
|
||||
/* part2:
|
||||
if( x < SQRTHF ) {
|
||||
e -= 1;
|
||||
x = x + x - 1.0;
|
||||
} else { x = x - 1.0; }
|
||||
*/
|
||||
Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
|
||||
Packet4f tmp = pand(x, mask);
|
||||
x = psub(x, p4f_1);
|
||||
e = psub(e, pand(p4f_1, mask));
|
||||
x = padd(x, tmp);
|
||||
|
||||
Packet4f x2 = pmul(x,x);
|
||||
Packet4f x3 = pmul(x2,x);
|
||||
|
||||
Packet4f y, y1, y2;
|
||||
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
|
||||
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
|
||||
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
|
||||
y = pmadd(y , x, p4f_cephes_log_p2);
|
||||
y1 = pmadd(y1, x, p4f_cephes_log_p5);
|
||||
y2 = pmadd(y2, x, p4f_cephes_log_p8);
|
||||
y = pmadd(y, x3, y1);
|
||||
y = pmadd(y, x3, y2);
|
||||
y = pmul(y, x3);
|
||||
|
||||
y1 = pmul(e, p4f_cephes_log_q1);
|
||||
tmp = pmul(x2, p4f_half);
|
||||
y = padd(y, y1);
|
||||
x = psub(x, tmp);
|
||||
y2 = pmul(e, p4f_cephes_log_q2);
|
||||
x = padd(x, y);
|
||||
x = padd(x, y2);
|
||||
// negative arg will be NAN, 0 will be -INF
|
||||
return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
|
||||
_mm_and_ps(iszero_mask, p4f_minus_inf));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
|
||||
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
Packet4f tmp, fx;
|
||||
Packet4i emm0;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
|
||||
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
fx = _mm_floor_ps(fx);
|
||||
#else
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
tmp = _mm_cvtepi32_ps(emm0);
|
||||
/* if greater, substract 1 */
|
||||
Packet4f mask = _mm_cmpgt_ps(tmp, fx);
|
||||
mask = _mm_and_ps(mask, p4f_1);
|
||||
fx = psub(tmp, mask);
|
||||
#endif
|
||||
|
||||
tmp = pmul(fx, p4f_cephes_exp_C1);
|
||||
Packet4f z = pmul(fx, p4f_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
z = pmul(x,x);
|
||||
|
||||
Packet4f y = p4f_cephes_exp_p0;
|
||||
y = pmadd(y, x, p4f_cephes_exp_p1);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p2);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p3);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p4);
|
||||
y = pmadd(y, x, p4f_cephes_exp_p5);
|
||||
y = pmadd(y, z, x);
|
||||
y = padd(y, p4f_1);
|
||||
|
||||
// build 2^n
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
|
||||
emm0 = _mm_slli_epi32(emm0, 23);
|
||||
return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
|
||||
}
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
Packet2d x = _x;
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
|
||||
|
||||
Packet2d tmp, fx;
|
||||
Packet4i emm0;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
fx = _mm_floor_pd(fx);
|
||||
#else
|
||||
emm0 = _mm_cvttpd_epi32(fx);
|
||||
tmp = _mm_cvtepi32_pd(emm0);
|
||||
/* if greater, substract 1 */
|
||||
Packet2d mask = _mm_cmpgt_pd(tmp, fx);
|
||||
mask = _mm_and_pd(mask, p2d_1);
|
||||
fx = psub(tmp, mask);
|
||||
#endif
|
||||
|
||||
tmp = pmul(fx, p2d_cephes_exp_C1);
|
||||
Packet2d z = pmul(fx, p2d_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
Packet2d x2 = pmul(x,x);
|
||||
|
||||
Packet2d px = p2d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p2d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p2d_cephes_exp_p2);
|
||||
px = pmul (px, x);
|
||||
|
||||
Packet2d qx = p2d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q3);
|
||||
|
||||
x = pdiv(px,psub(qx,px));
|
||||
x = pmadd(p2d_2,x,p2d_1);
|
||||
|
||||
// build 2^n
|
||||
emm0 = _mm_cvttpd_epi32(fx);
|
||||
emm0 = _mm_add_epi32(emm0, p4i_1023_0);
|
||||
emm0 = _mm_slli_epi32(emm0, 20);
|
||||
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
|
||||
return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
|
||||
}
|
||||
|
||||
/* evaluation of 4 sines at onces, using SSE2 intrinsics.
|
||||
|
||||
The code is the exact rewriting of the cephes sinf function.
|
||||
Precision is excellent as long as x < 8192 (I did not bother to
|
||||
take into account the special handling they have for greater values
|
||||
-- it does not return garbage for arguments over 8192, though, but
|
||||
the extra precision is missing).
|
||||
|
||||
Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
|
||||
surprising but correct result.
|
||||
*/
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(2, 2);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(4, 4);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
|
||||
|
||||
Packet4f xmm1, xmm2, xmm3, sign_bit, y;
|
||||
|
||||
Packet4i emm0, emm2;
|
||||
sign_bit = x;
|
||||
/* take the absolute value */
|
||||
x = pabs(x);
|
||||
|
||||
/* take the modulo */
|
||||
|
||||
/* extract the sign bit (upper one) */
|
||||
sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
|
||||
|
||||
/* scale by 4/Pi */
|
||||
y = pmul(x, p4f_cephes_FOPI);
|
||||
|
||||
/* store the integer part of y in mm0 */
|
||||
emm2 = _mm_cvttps_epi32(y);
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
emm2 = _mm_add_epi32(emm2, p4i_1);
|
||||
emm2 = _mm_and_si128(emm2, p4i_not1);
|
||||
y = _mm_cvtepi32_ps(emm2);
|
||||
/* get the swap sign flag */
|
||||
emm0 = _mm_and_si128(emm2, p4i_4);
|
||||
emm0 = _mm_slli_epi32(emm0, 29);
|
||||
/* get the polynom selection mask
|
||||
there is one polynom for 0 <= x <= Pi/4
|
||||
and another one for Pi/4<x<=Pi/2
|
||||
|
||||
Both branches will be computed.
|
||||
*/
|
||||
emm2 = _mm_and_si128(emm2, p4i_2);
|
||||
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
|
||||
|
||||
Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
|
||||
Packet4f poly_mask = _mm_castsi128_ps(emm2);
|
||||
sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
|
||||
|
||||
/* The magic pass: "Extended precision modular arithmetic"
|
||||
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||
xmm1 = pmul(y, p4f_minus_cephes_DP1);
|
||||
xmm2 = pmul(y, p4f_minus_cephes_DP2);
|
||||
xmm3 = pmul(y, p4f_minus_cephes_DP3);
|
||||
x = padd(x, xmm1);
|
||||
x = padd(x, xmm2);
|
||||
x = padd(x, xmm3);
|
||||
|
||||
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||
y = p4f_coscof_p0;
|
||||
Packet4f z = _mm_mul_ps(x,x);
|
||||
|
||||
y = pmadd(y, z, p4f_coscof_p1);
|
||||
y = pmadd(y, z, p4f_coscof_p2);
|
||||
y = pmul(y, z);
|
||||
y = pmul(y, z);
|
||||
Packet4f tmp = pmul(z, p4f_half);
|
||||
y = psub(y, tmp);
|
||||
y = padd(y, p4f_1);
|
||||
|
||||
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||
|
||||
Packet4f y2 = p4f_sincof_p0;
|
||||
y2 = pmadd(y2, z, p4f_sincof_p1);
|
||||
y2 = pmadd(y2, z, p4f_sincof_p2);
|
||||
y2 = pmul(y2, z);
|
||||
y2 = pmul(y2, x);
|
||||
y2 = padd(y2, x);
|
||||
|
||||
/* select the correct result from the two polynoms */
|
||||
y2 = _mm_and_ps(poly_mask, y2);
|
||||
y = _mm_andnot_ps(poly_mask, y);
|
||||
y = _mm_or_ps(y,y2);
|
||||
/* update the sign */
|
||||
return _mm_xor_ps(y, sign_bit);
|
||||
}
|
||||
|
||||
/* almost the same as psin */
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f x = _x;
|
||||
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(2, 2);
|
||||
_EIGEN_DECLARE_CONST_Packet4i(4, 4);
|
||||
|
||||
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
|
||||
|
||||
Packet4f xmm1, xmm2, xmm3, y;
|
||||
Packet4i emm0, emm2;
|
||||
|
||||
x = pabs(x);
|
||||
|
||||
/* scale by 4/Pi */
|
||||
y = pmul(x, p4f_cephes_FOPI);
|
||||
|
||||
/* get the integer part of y */
|
||||
emm2 = _mm_cvttps_epi32(y);
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
emm2 = _mm_add_epi32(emm2, p4i_1);
|
||||
emm2 = _mm_and_si128(emm2, p4i_not1);
|
||||
y = _mm_cvtepi32_ps(emm2);
|
||||
|
||||
emm2 = _mm_sub_epi32(emm2, p4i_2);
|
||||
|
||||
/* get the swap sign flag */
|
||||
emm0 = _mm_andnot_si128(emm2, p4i_4);
|
||||
emm0 = _mm_slli_epi32(emm0, 29);
|
||||
/* get the polynom selection mask */
|
||||
emm2 = _mm_and_si128(emm2, p4i_2);
|
||||
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
|
||||
|
||||
Packet4f sign_bit = _mm_castsi128_ps(emm0);
|
||||
Packet4f poly_mask = _mm_castsi128_ps(emm2);
|
||||
|
||||
/* The magic pass: "Extended precision modular arithmetic"
|
||||
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||
xmm1 = pmul(y, p4f_minus_cephes_DP1);
|
||||
xmm2 = pmul(y, p4f_minus_cephes_DP2);
|
||||
xmm3 = pmul(y, p4f_minus_cephes_DP3);
|
||||
x = padd(x, xmm1);
|
||||
x = padd(x, xmm2);
|
||||
x = padd(x, xmm3);
|
||||
|
||||
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||
y = p4f_coscof_p0;
|
||||
Packet4f z = pmul(x,x);
|
||||
|
||||
y = pmadd(y,z,p4f_coscof_p1);
|
||||
y = pmadd(y,z,p4f_coscof_p2);
|
||||
y = pmul(y, z);
|
||||
y = pmul(y, z);
|
||||
Packet4f tmp = _mm_mul_ps(z, p4f_half);
|
||||
y = psub(y, tmp);
|
||||
y = padd(y, p4f_1);
|
||||
|
||||
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||
Packet4f y2 = p4f_sincof_p0;
|
||||
y2 = pmadd(y2, z, p4f_sincof_p1);
|
||||
y2 = pmadd(y2, z, p4f_sincof_p2);
|
||||
y2 = pmul(y2, z);
|
||||
y2 = pmadd(y2, x, x);
|
||||
|
||||
/* select the correct result from the two polynoms */
|
||||
y2 = _mm_and_ps(poly_mask, y2);
|
||||
y = _mm_andnot_ps(poly_mask, y);
|
||||
y = _mm_or_ps(y,y2);
|
||||
|
||||
/* update the sign */
|
||||
return _mm_xor_ps(y, sign_bit);
|
||||
}
|
||||
|
||||
#if EIGEN_FAST_MATH
|
||||
|
||||
// Functions for sqrt.
|
||||
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
|
||||
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
|
||||
// exact solution. It does not handle +inf, or denormalized numbers correctly.
|
||||
// The main advantage of this approach is not just speed, but also the fact that
|
||||
// it can be inlined and pipelined with other computations, further reducing its
|
||||
// effective latency. This is similar to Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
|
||||
Packet4f denormal_mask = _mm_and_ps(
|
||||
_mm_cmpge_ps(_x, _mm_setzero_ps()),
|
||||
_mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
|
||||
|
||||
// Compute approximate reciprocal sqrt.
|
||||
Packet4f x = _mm_rsqrt_ps(_x);
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
|
||||
// Flush results for denormals to zero.
|
||||
return _mm_andnot_ps(denormal_mask, pmul(_x,x));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
|
||||
|
||||
#if EIGEN_FAST_MATH
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
|
||||
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
|
||||
|
||||
Packet4f neg_half = pmul(_x, p4f_minus_half);
|
||||
|
||||
// select only the inverse sqrt of positive normal inputs (denormals are
|
||||
// flushed to zero and cause infs as well).
|
||||
Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
|
||||
Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
|
||||
|
||||
// Fill in NaNs and Infs for the negative/zero entries.
|
||||
Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
|
||||
Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
|
||||
Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
|
||||
_mm_and_ps(zero_mask, p4f_inf));
|
||||
|
||||
// Do a single step of Newton's iteration.
|
||||
x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
|
||||
|
||||
// Insert NaNs and Infs in all the right places.
|
||||
return _mm_or_ps(x, infs_and_nans);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
|
||||
// Unfortunately we can't use the much faster mm_rqsrt_ps since it only provides an approximation.
|
||||
return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
||||
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
|
||||
return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
|
||||
}
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||
ptanh<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
namespace numext {
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float sqrt(const float &x)
|
||||
{
|
||||
return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double sqrt(const double &x)
|
||||
{
|
||||
#if EIGEN_COMP_GNUC_STRICT
|
||||
// This works around a GCC bug generating poor code for _mm_sqrt_pd
|
||||
// See https://bitbucket.org/eigen/eigen/commits/14f468dba4d350d7c19c9b93072e19f7b3df563b
|
||||
return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
|
||||
#else
|
||||
return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // end namespace numex
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_SSE_H
|
||||
895
external/include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h
vendored
Normal file
895
external/include/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h
vendored
Normal file
@@ -0,0 +1,895 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_SSE_H
|
||||
#define EIGEN_PACKET_MATH_SSE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
#ifdef __FMA__
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
// One solution is to increase ABI version using -fabi-version=4 (or greater).
|
||||
// Otherwise, we workaround this inconvenience by wrapping 128bit types into the following helper
|
||||
// structure:
|
||||
template<typename T>
|
||||
struct eigen_packet_wrapper
|
||||
{
|
||||
EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
|
||||
EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
|
||||
EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
|
||||
m_val = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
T m_val;
|
||||
};
|
||||
typedef eigen_packet_wrapper<__m128> Packet4f;
|
||||
typedef eigen_packet_wrapper<__m128i> Packet4i;
|
||||
typedef eigen_packet_wrapper<__m128d> Packet2d;
|
||||
#else
|
||||
typedef __m128 Packet4f;
|
||||
typedef __m128i Packet4i;
|
||||
typedef __m128d Packet2d;
|
||||
#endif
|
||||
|
||||
template<> struct is_arithmetic<__m128> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m128i> { enum { value = true }; };
|
||||
template<> struct is_arithmetic<__m128d> { enum { value = true }; };
|
||||
|
||||
#define vec4f_swizzle1(v,p,q,r,s) \
|
||||
(_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
|
||||
|
||||
#define vec4i_swizzle1(v,p,q,r,s) \
|
||||
(_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
|
||||
|
||||
#define vec2d_swizzle1(v,p,q) \
|
||||
(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
|
||||
|
||||
#define vec4f_swizzle2(a,b,p,q,r,s) \
|
||||
(_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
|
||||
|
||||
#define vec4i_swizzle2(a,b,p,q,r,s) \
|
||||
(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
|
||||
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
|
||||
const Packet2d p2d_##NAME = pset1<Packet2d>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
|
||||
const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||
|
||||
|
||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||
// to leverage AVX instructions.
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasBlend = 1
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1
|
||||
#endif
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasBlend = 1
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1
|
||||
#endif
|
||||
};
|
||||
};
|
||||
#endif
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
|
||||
template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC==1500
|
||||
// Workaround MSVC 9 internal compiler error.
|
||||
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
|
||||
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
|
||||
#endif
|
||||
|
||||
// GCC generates a shufps instruction for _mm_set1_ps/_mm_load1_ps instead of the more efficient pshufd instruction.
|
||||
// However, using inrinsics for pset1 makes gcc to generate crappy code in some cases (see bug 203)
|
||||
// Using inline assembly is also not an option because then gcc fails to reorder properly the instructions.
|
||||
// Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply.
|
||||
// Also note that with AVX, we want it to generate a vbroadcastss.
|
||||
#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
|
||||
return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
|
||||
{
|
||||
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
|
||||
return _mm_xor_ps(a,mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
|
||||
{
|
||||
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
|
||||
return _mm_xor_pd(a,mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
|
||||
{
|
||||
return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_mullo_epi32(a,b);
|
||||
#else
|
||||
// this version is slightly faster than 4 scalar products
|
||||
return vec4i_swizzle1(
|
||||
vec4i_swizzle2(
|
||||
_mm_mul_epu32(a,b),
|
||||
_mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
|
||||
vec4i_swizzle1(b,1,0,3,2)),
|
||||
0,2,0,2),
|
||||
0,2,1,3);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_min_epi32(a,b);
|
||||
#else
|
||||
// after some bench, this version *is* faster than a scalar implementation
|
||||
Packet4i mask = _mm_cmplt_epi32(a,b);
|
||||
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_max_epi32(a,b);
|
||||
#else
|
||||
// after some bench, this version *is* faster than a scalar implementation
|
||||
Packet4i mask = _mm_cmpgt_epi32(a,b);
|
||||
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
|
||||
|
||||
#if EIGEN_COMP_MSVC
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
#if (EIGEN_COMP_MSVC==1600)
|
||||
// NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
|
||||
// (i.e., it does not generate an unaligned load!!
|
||||
__m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
|
||||
res = _mm_loadh_pi(res, (const __m64*)(from+2));
|
||||
return res;
|
||||
#else
|
||||
return _mm_loadu_ps(from);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
// NOTE: with the code below, MSVC's compiler crashes!
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
return _mm_loadu_ps(from);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
return _mm_loadu_pd(from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
|
||||
{
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{ return pset1<Packet2d>(from[0]); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i tmp;
|
||||
tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
|
||||
return vec4i_swizzle1(tmp, 0, 0, 1, 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
|
||||
{
|
||||
return _mm_set_pd(from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
|
||||
{
|
||||
return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtss_f32(from);
|
||||
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
|
||||
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
|
||||
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtsd_f64(from);
|
||||
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtsi128_si32(from);
|
||||
to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
|
||||
to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
|
||||
to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
|
||||
}
|
||||
|
||||
// some compilers might be tempted to perform multiple moves instead of using a vector path.
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
|
||||
{
|
||||
Packet4f pa = _mm_set_ss(a);
|
||||
pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
|
||||
}
|
||||
// some compilers might be tempted to perform multiple moves instead of using a vector path.
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
|
||||
{
|
||||
Packet2d pa = _mm_set_sd(a);
|
||||
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
|
||||
}
|
||||
|
||||
#if EIGEN_COMP_PGI
|
||||
typedef const void * SsePrefetchPtrType;
|
||||
#else
|
||||
typedef const char * SsePrefetchPtrType;
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
|
||||
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
|
||||
// Direct of the struct members fixed bug #62.
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
|
||||
#elif EIGEN_COMP_MSVC_STRICT
|
||||
// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
|
||||
{ return _mm_shuffle_ps(a,a,0x1B); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
|
||||
{ return _mm_shuffle_pd(a,a,0x1); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
{ return _mm_shuffle_epi32(a,0x1B); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
|
||||
{
|
||||
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
|
||||
return _mm_and_ps(a,mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
|
||||
{
|
||||
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
|
||||
return _mm_and_pd(a,mask);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
return _mm_abs_epi32(a);
|
||||
#else
|
||||
Packet4i aux = _mm_srai_epi32(a,31);
|
||||
return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
|
||||
#endif
|
||||
}
|
||||
|
||||
// with AVX, the default implementations based on pload1 are faster
|
||||
#ifndef __AVX__
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = pload<Packet4f>(a);
|
||||
a0 = vec4f_swizzle1(a3, 0,0,0,0);
|
||||
a1 = vec4f_swizzle1(a3, 1,1,1,1);
|
||||
a2 = vec4f_swizzle1(a3, 2,2,2,2);
|
||||
a3 = vec4f_swizzle1(a3, 3,3,3,3);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet2d>(const double *a,
|
||||
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
a0 = _mm_loaddup_pd(a+0);
|
||||
a1 = _mm_loaddup_pd(a+1);
|
||||
a2 = _mm_loaddup_pd(a+2);
|
||||
a3 = _mm_loaddup_pd(a+3);
|
||||
#else
|
||||
a1 = pload<Packet2d>(a);
|
||||
a0 = vec2d_swizzle1(a1, 0,0);
|
||||
a1 = vec2d_swizzle1(a1, 1,1);
|
||||
a3 = pload<Packet2d>(a+2);
|
||||
a2 = vec2d_swizzle1(a3, 0,0);
|
||||
a3 = vec2d_swizzle1(a3, 1,1);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
|
||||
{
|
||||
vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
|
||||
vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
|
||||
vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
|
||||
vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
{
|
||||
return _mm_hadd_pd(vecs[0], vecs[1]);
|
||||
}
|
||||
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
Packet4f tmp0, tmp1, tmp2;
|
||||
tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
|
||||
tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
|
||||
tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
|
||||
tmp0 = _mm_add_ps(tmp0, tmp1);
|
||||
tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
|
||||
tmp1 = _mm_add_ps(tmp1, tmp2);
|
||||
tmp2 = _mm_movehl_ps(tmp1, tmp0);
|
||||
tmp0 = _mm_movelh_ps(tmp0, tmp1);
|
||||
return _mm_add_ps(tmp0, tmp2);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
{
|
||||
return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
|
||||
}
|
||||
#endif // SSE3
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||
// (from Nehalem to Haswell)
|
||||
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||
// Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3));
|
||||
// return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp));
|
||||
// #else
|
||||
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
// #endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||
// (from Nehalem to Haswell)
|
||||
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||
// return pfirst<Packet2d>(_mm_hadd_pd(a, a));
|
||||
// #else
|
||||
return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
// #endif
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
{
|
||||
return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
Packet4i tmp0 = _mm_hadd_epi32(a,a);
|
||||
return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
|
||||
}
|
||||
#else
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
|
||||
return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
{
|
||||
Packet4i tmp0, tmp1, tmp2;
|
||||
tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
|
||||
tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
|
||||
tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
|
||||
tmp0 = _mm_add_epi32(tmp0, tmp1);
|
||||
tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
|
||||
tmp1 = _mm_add_epi32(tmp1, tmp2);
|
||||
tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
|
||||
tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
|
||||
return _mm_add_epi32(tmp0, tmp2);
|
||||
}
|
||||
#endif
|
||||
// Other reduction functions:
|
||||
|
||||
// mul
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., reusing pmul is very slow !)
|
||||
// TODO try to call _mm_mul_epu32 directly
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
|
||||
}
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
|
||||
return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
|
||||
int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
|
||||
return aux0<aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
|
||||
return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
|
||||
return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
|
||||
int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
|
||||
return aux0>aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
|
||||
#if EIGEN_COMP_GNUC
|
||||
// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
|
||||
// {
|
||||
// Packet4f res = b;
|
||||
// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
|
||||
// return res;
|
||||
// }
|
||||
// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i& a, const Packet4i& b, const int i)
|
||||
// {
|
||||
// Packet4i res = a;
|
||||
// asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i));
|
||||
// return res;
|
||||
// }
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
// SSSE3 versions
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
|
||||
{
|
||||
if (Offset!=0)
|
||||
first = _mm_alignr_epi8(second,first, Offset*4);
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
|
||||
}
|
||||
};
|
||||
#else
|
||||
// SSE2 versions
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm_move_ss(first,second);
|
||||
first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm_movehl_ps(first,first);
|
||||
first = _mm_movelh_ps(first,second);
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm_move_ss(first,second);
|
||||
first = _mm_shuffle_ps(first,second,0x93);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
|
||||
first = _mm_shuffle_epi32(first,0x39);
|
||||
}
|
||||
else if (Offset==2)
|
||||
{
|
||||
first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
|
||||
first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
|
||||
}
|
||||
else if (Offset==3)
|
||||
{
|
||||
first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
|
||||
first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
|
||||
{
|
||||
if (Offset==1)
|
||||
{
|
||||
first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
|
||||
first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
_MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
__m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
|
||||
kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
|
||||
kernel.packet[1] = tmp;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
__m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
|
||||
__m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
|
||||
__m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
|
||||
__m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
|
||||
|
||||
kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
|
||||
kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
|
||||
kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
|
||||
kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m128i false_mask = _mm_cmpeq_epi32(select, zero);
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
|
||||
#else
|
||||
return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
|
||||
const __m128 zero = _mm_setzero_ps();
|
||||
const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
|
||||
__m128 false_mask = _mm_cmpeq_ps(select, zero);
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
|
||||
#else
|
||||
return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
const __m128d zero = _mm_setzero_pd();
|
||||
const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
|
||||
__m128d false_mask = _mm_cmpeq_pd(select, zero);
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||
#else
|
||||
return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_ps(a,pset1<Packet4f>(b),1);
|
||||
#else
|
||||
return _mm_move_ss(a, _mm_load_ss(&b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_pd(a,pset1<Packet2d>(b),1);
|
||||
#else
|
||||
return _mm_move_sd(a, _mm_load_sd(&b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
|
||||
#else
|
||||
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
|
||||
return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
|
||||
#else
|
||||
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
|
||||
return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
|
||||
return ::fmaf(a,b,c);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
|
||||
return ::fma(a,b,c);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#if EIGEN_COMP_PGI
|
||||
// PGI++ does not define the following intrinsics in C++ mode.
|
||||
static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); }
|
||||
static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
|
||||
static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); }
|
||||
static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); }
|
||||
static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); }
|
||||
static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_SSE_H
|
||||
77
external/include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h
vendored
Normal file
77
external/include/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TYPE_CASTING_SSE_H
|
||||
#define EIGEN_TYPE_CASTING_SSE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template <>
|
||||
struct type_casting_traits<float, int> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<int, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<double, float> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 2,
|
||||
TgtCoeffRatio = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, double> {
|
||||
enum {
|
||||
VectorizedCast = 1,
|
||||
SrcCoeffRatio = 1,
|
||||
TgtCoeffRatio = 2
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
||||
return _mm_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return _mm_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
|
||||
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
|
||||
// Simply discard the second half of the input
|
||||
return _mm_cvtps_pd(a);
|
||||
}
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TYPE_CASTING_SSE_H
|
||||
397
external/include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h
vendored
Normal file
397
external/include/eigen3/Eigen/src/Core/arch/ZVector/Complex.h
vendored
Normal file
@@ -0,0 +1,397 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX32_ALTIVEC_H
|
||||
#define EIGEN_COMPLEX32_ALTIVEC_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
|
||||
|
||||
struct Packet1cd
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
|
||||
Packet2d v;
|
||||
};
|
||||
|
||||
struct Packet2cf
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
|
||||
union {
|
||||
Packet4f v;
|
||||
Packet1cd cd[2];
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
||||
{
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasBlend = 1,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
||||
{
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 1,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
|
||||
template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
|
||||
|
||||
/* Forward declaration */
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
|
||||
res.cd[1] = res.cd[0];
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return pload<Packet2cf>(af);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
|
||||
{
|
||||
return pload<Packet1cd>(from);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 af[2];
|
||||
pstore<std::complex<float> >((std::complex<float> *) af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
|
||||
{
|
||||
pstore<std::complex<double> >(to, from);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
|
||||
res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
Packet2d a_re, a_im, v1, v2;
|
||||
|
||||
// Permute and multiply the real parts of a and b
|
||||
a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
|
||||
// Get the imaginary parts of a
|
||||
a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
|
||||
// multiply a_re * b
|
||||
v1 = vec_madd(a_re, b.v, p2d_ZERO);
|
||||
// multiply a_im * b and get the conjugate result
|
||||
v2 = vec_madd(a_im, b.v, p2d_ZERO);
|
||||
v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
|
||||
v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
|
||||
|
||||
return Packet1cd(v1 + v2);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
|
||||
res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
std::complex<double> EIGEN_ALIGN16 res;
|
||||
pstore<std::complex<double> >(&res, a);
|
||||
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> EIGEN_ALIGN16 res[2];
|
||||
pstore<std::complex<float> >(res, a);
|
||||
|
||||
return res[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = a.cd[1];
|
||||
res.cd[1] = a.cd[0];
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
|
||||
vec_st2f(b.v, (float*)&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
|
||||
{
|
||||
return vecs[0];
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
|
||||
{
|
||||
PacketBlock<Packet2cf,2> transpose;
|
||||
transpose.packet[0] = vecs[0];
|
||||
transpose.packet[1] = vecs[1];
|
||||
ptranspose(transpose);
|
||||
|
||||
return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
return pfirst(a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
std::complex<float> res;
|
||||
Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
|
||||
vec_st2f(b.v, (float*)&res);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet1cd>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
|
||||
{
|
||||
// FIXME is it sure we never have to align a Packet1cd?
|
||||
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
|
||||
}
|
||||
};
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2cf>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
|
||||
{
|
||||
if (Offset == 1) {
|
||||
first.cd[0] = first.cd[1];
|
||||
first.cd[1] = second.cd[0];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(a, pconj(b));
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return internal::pmul(pconj(a), b);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
|
||||
{ return padd(pmul(x,y),c); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
|
||||
{
|
||||
return pconj(internal::pmul(a, b));
|
||||
}
|
||||
};
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
|
||||
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
|
||||
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// TODO optimize it for AltiVec
|
||||
Packet2cf res;
|
||||
res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
|
||||
res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
||||
{
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
|
||||
{
|
||||
Packet2cf res;
|
||||
res.cd[0] = pcplxflip(x.cd[0]);
|
||||
res.cd[1] = pcplxflip(x.cd[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
|
||||
{
|
||||
Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
|
||||
kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
|
||||
{
|
||||
Packet1cd tmp = kernel.packet[0].cd[1];
|
||||
kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
|
||||
kernel.packet[1].cd[0] = tmp;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
|
||||
Packet2cf result;
|
||||
const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
|
||||
result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX32_ALTIVEC_H
|
||||
137
external/include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h
vendored
Normal file
137
external/include/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2007 Julien Pommier
|
||||
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/* The sin, cos, exp, and log functions of this file come from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, 709.437);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
|
||||
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
|
||||
static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
Packet2d x = _x;
|
||||
|
||||
Packet2d tmp, fx;
|
||||
Packet2l emm0;
|
||||
|
||||
// clamp x
|
||||
x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
|
||||
|
||||
fx = vec_floor(fx);
|
||||
|
||||
tmp = pmul(fx, p2d_cephes_exp_C1);
|
||||
Packet2d z = pmul(fx, p2d_cephes_exp_C2);
|
||||
x = psub(x, tmp);
|
||||
x = psub(x, z);
|
||||
|
||||
Packet2d x2 = pmul(x,x);
|
||||
|
||||
Packet2d px = p2d_cephes_exp_p0;
|
||||
px = pmadd(px, x2, p2d_cephes_exp_p1);
|
||||
px = pmadd(px, x2, p2d_cephes_exp_p2);
|
||||
px = pmul (px, x);
|
||||
|
||||
Packet2d qx = p2d_cephes_exp_q0;
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q1);
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q2);
|
||||
qx = pmadd(qx, x2, p2d_cephes_exp_q3);
|
||||
|
||||
x = pdiv(px,psub(qx,px));
|
||||
x = pmadd(p2d_2,x,p2d_1);
|
||||
|
||||
// build 2^n
|
||||
emm0 = vec_ctsl(fx, 0);
|
||||
|
||||
static const Packet2l p2l_1023 = { 1023, 1023 };
|
||||
static const Packet2ul p2ul_52 = { 52, 52 };
|
||||
|
||||
emm0 = emm0 + p2l_1023;
|
||||
emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
|
||||
|
||||
// Altivec's max & min operators just drop silent NaNs. Check NaNs in
|
||||
// inputs and return them unmodified.
|
||||
Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
|
||||
return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
|
||||
isnumber_mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f pexp<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d psqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return __builtin_s390_vfsqdb(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
||||
// Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
|
||||
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
|
||||
Packet4f res;
|
||||
res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
|
||||
res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||
945
external/include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h
vendored
Normal file
945
external/include/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h
vendored
Normal file
@@ -0,0 +1,945 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_ZVECTOR_H
|
||||
#define EIGEN_PACKET_MATH_ZVECTOR_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
|
||||
#endif
|
||||
|
||||
typedef __vector int Packet4i;
|
||||
typedef __vector unsigned int Packet4ui;
|
||||
typedef __vector __bool int Packet4bi;
|
||||
typedef __vector short int Packet8i;
|
||||
typedef __vector unsigned char Packet16uc;
|
||||
typedef __vector double Packet2d;
|
||||
typedef __vector unsigned long long Packet2ul;
|
||||
typedef __vector long long Packet2l;
|
||||
|
||||
typedef struct {
|
||||
Packet2d v4f[2];
|
||||
} Packet4f;
|
||||
|
||||
typedef union {
|
||||
int32_t i[4];
|
||||
uint32_t ui[4];
|
||||
int64_t l[2];
|
||||
uint64_t ul[2];
|
||||
double d[2];
|
||||
Packet4i v4i;
|
||||
Packet4ui v4ui;
|
||||
Packet2l v2l;
|
||||
Packet2ul v2ul;
|
||||
Packet2d v2d;
|
||||
} Packet;
|
||||
|
||||
// We don't want to write the same code all the time, but we need to reuse the constants
|
||||
// and it doesn't really work to declare them global, so we define macros instead
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
|
||||
Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
|
||||
Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
|
||||
Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
|
||||
Packet4i p4i_##NAME = pset1<Packet4i>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
|
||||
Packet2d p2d_##NAME = pset1<Packet2d>(X)
|
||||
|
||||
#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
|
||||
Packet2l p2l_##NAME = pset1<Packet2l>(X)
|
||||
|
||||
// These constants are endian-agnostic
|
||||
//static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
|
||||
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
|
||||
static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
|
||||
|
||||
static Packet2d p2d_ONE = { 1.0, 1.0 };
|
||||
static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
|
||||
|
||||
static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
|
||||
static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
|
||||
static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
|
||||
|
||||
static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
|
||||
static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
|
||||
|
||||
// Mask alignment
|
||||
#define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
|
||||
|
||||
#define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
|
||||
|
||||
// Handle endianness properly while loading constants
|
||||
// Define global static constants:
|
||||
|
||||
static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
|
||||
static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
|
||||
static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||
|
||||
static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
|
||||
static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
|
||||
/*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
|
||||
|
||||
static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/
|
||||
static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
|
||||
/*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
|
||||
static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/
|
||||
static Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
|
||||
static Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
|
||||
|
||||
//static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
|
||||
|
||||
//static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
|
||||
|
||||
|
||||
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
|
||||
#define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR);
|
||||
#else
|
||||
#define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( " pfd [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
|
||||
#endif
|
||||
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
{
|
||||
typedef Packet4i type;
|
||||
typedef Packet4i half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<float> : default_packet_traits
|
||||
{
|
||||
typedef Packet4f type;
|
||||
typedef Packet4f half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=4,
|
||||
HasHalfPacket = 0,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct packet_traits<double> : default_packet_traits
|
||||
{
|
||||
typedef Packet2d type;
|
||||
typedef Packet2d half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size=2,
|
||||
HasHalfPacket = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
};
|
||||
|
||||
template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
|
||||
template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
|
||||
template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
|
||||
|
||||
/* Forward declaration */
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
|
||||
{
|
||||
Packet vt;
|
||||
vt.v4i = v;
|
||||
s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
|
||||
{
|
||||
Packet vt;
|
||||
vt.v4ui = v;
|
||||
s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
|
||||
{
|
||||
Packet vt;
|
||||
vt.v2l = v;
|
||||
s << vt.l[0] << ", " << vt.l[1];
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
|
||||
{
|
||||
Packet vt;
|
||||
vt.v2ul = v;
|
||||
s << vt.ul[0] << ", " << vt.ul[1] ;
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
|
||||
{
|
||||
Packet vt;
|
||||
vt.v2d = v;
|
||||
s << vt.d[0] << ", " << vt.d[1];
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Helper function to simulate a vec_splat_packet4f
|
||||
*/
|
||||
template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
|
||||
{
|
||||
Packet4f splat;
|
||||
switch (element) {
|
||||
case 0:
|
||||
splat.v4f[0] = vec_splat(from.v4f[0], 0);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 1:
|
||||
splat.v4f[0] = vec_splat(from.v4f[0], 1);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 2:
|
||||
splat.v4f[0] = vec_splat(from.v4f[1], 0);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
case 3:
|
||||
splat.v4f[0] = vec_splat(from.v4f[1], 1);
|
||||
splat.v4f[1] = splat.v4f[0];
|
||||
break;
|
||||
}
|
||||
return splat;
|
||||
}
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4i>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
|
||||
{
|
||||
switch (Offset % 4) {
|
||||
case 1:
|
||||
first = vec_sld(first, second, 4); break;
|
||||
case 2:
|
||||
first = vec_sld(first, second, 8); break;
|
||||
case 3:
|
||||
first = vec_sld(first, second, 12); break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
|
||||
*/
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet4f>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
|
||||
{
|
||||
switch (Offset % 4) {
|
||||
case 1:
|
||||
first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
|
||||
first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
|
||||
break;
|
||||
case 2:
|
||||
first.v4f[0] = first.v4f[1];
|
||||
first.v4f[1] = second.v4f[0];
|
||||
break;
|
||||
case 3:
|
||||
first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
|
||||
first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<int Offset>
|
||||
struct palign_impl<Offset,Packet2d>
|
||||
{
|
||||
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
|
||||
{
|
||||
if (Offset == 1)
|
||||
first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
|
||||
}
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet *vfrom;
|
||||
vfrom = (Packet *) from;
|
||||
return vfrom->v4i;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet4f vfrom;
|
||||
vfrom.v4f[0] = vec_ld2f(&from[0]);
|
||||
vfrom.v4f[1] = vec_ld2f(&from[2]);
|
||||
return vfrom;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet *vfrom;
|
||||
vfrom = (Packet *) from;
|
||||
return vfrom->v2d;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
Packet *vto;
|
||||
vto = (Packet *) to;
|
||||
vto->v4i = from;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
vec_st2f(from.v4f[0], &to[0]);
|
||||
vec_st2f(from.v4f[1], &to[2]);
|
||||
}
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
|
||||
{
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
Packet *vto;
|
||||
vto = (Packet *) to;
|
||||
vto->v2d = from;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
|
||||
{
|
||||
return vec_splats(from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
|
||||
return vec_splats(from);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
|
||||
{
|
||||
Packet4f to;
|
||||
to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
|
||||
to.v4f[1] = to.v4f[0];
|
||||
return to;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4i>(const int *a,
|
||||
Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
|
||||
{
|
||||
a3 = pload<Packet4i>(a);
|
||||
a0 = vec_splat(a3, 0);
|
||||
a1 = vec_splat(a3, 1);
|
||||
a2 = vec_splat(a3, 2);
|
||||
a3 = vec_splat(a3, 3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet4f>(const float *a,
|
||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||
{
|
||||
a3 = pload<Packet4f>(a);
|
||||
a0 = vec_splat_packet4f<0>(a3);
|
||||
a1 = vec_splat_packet4f<1>(a3);
|
||||
a2 = vec_splat_packet4f<2>(a3);
|
||||
a3 = vec_splat_packet4f<3>(a3);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void
|
||||
pbroadcast4<Packet2d>(const double *a,
|
||||
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||
{
|
||||
a1 = pload<Packet2d>(a);
|
||||
a0 = vec_splat(a1, 0);
|
||||
a1 = vec_splat(a1, 1);
|
||||
a3 = pload<Packet2d>(a+2);
|
||||
a2 = vec_splat(a3, 0);
|
||||
a3 = vec_splat(a3, 1);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
|
||||
{
|
||||
int EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from[0*stride];
|
||||
ai[1] = from[1*stride];
|
||||
ai[2] = from[2*stride];
|
||||
ai[3] = from[3*stride];
|
||||
return pload<Packet4i>(ai);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 ai[4];
|
||||
ai[0] = from[0*stride];
|
||||
ai[1] = from[1*stride];
|
||||
ai[2] = from[2*stride];
|
||||
ai[3] = from[3*stride];
|
||||
return pload<Packet4f>(ai);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
af[0] = from[0*stride];
|
||||
af[1] = from[1*stride];
|
||||
return pload<Packet2d>(af);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
|
||||
{
|
||||
int EIGEN_ALIGN16 ai[4];
|
||||
pstore<int>((int *)ai, from);
|
||||
to[0*stride] = ai[0];
|
||||
to[1*stride] = ai[1];
|
||||
to[2*stride] = ai[2];
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
|
||||
{
|
||||
float EIGEN_ALIGN16 ai[4];
|
||||
pstore<float>((float *)ai, from);
|
||||
to[0*stride] = ai[0];
|
||||
to[1*stride] = ai[1];
|
||||
to[2*stride] = ai[2];
|
||||
to[3*stride] = ai[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
|
||||
{
|
||||
double EIGEN_ALIGN16 af[2];
|
||||
pstore<double>(af, from);
|
||||
to[0*stride] = af[0];
|
||||
to[1*stride] = af[1];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] + b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] + b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] - b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] - b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] * b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] * b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = a.v4f[0] / b.v4f[0];
|
||||
c.v4f[1] = a.v4f[1] / b.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
|
||||
{
|
||||
Packet4f c;
|
||||
c.v4f[0] = -a.v4f[0];
|
||||
c.v4f[1] = -a.v4f[1];
|
||||
return c;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
|
||||
res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
|
||||
res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_round(a.v4f[0]);
|
||||
res.v4f[1] = vec_round(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_ceil(a.v4f[0]);
|
||||
res.v4f[1] = vec_ceil(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_floor(a.v4f[0]);
|
||||
res.v4f[1] = vec_floor(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) { return pload<Packet4f>(from); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
|
||||
{
|
||||
Packet4i p = pload<Packet4i>(from);
|
||||
return vec_perm(p, p, p16uc_DUPLICATE32_HI);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
|
||||
{
|
||||
Packet4f p = pload<Packet4f>(from);
|
||||
p.v4f[1] = vec_splat(p.v4f[0], 1);
|
||||
p.v4f[0] = vec_splat(p.v4f[0], 0);
|
||||
return p;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
{
|
||||
Packet2d p = pload<Packet2d>(from);
|
||||
return vec_perm(p, p, p16uc_PSET64_HI);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
|
||||
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
||||
{
|
||||
return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
|
||||
{
|
||||
return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
|
||||
{
|
||||
Packet4f rev;
|
||||
rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
|
||||
rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
|
||||
return rev;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
res.v4f[0] = pabs(a.v4f[0]);
|
||||
res.v4f[1] = pabs(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
Packet4i b, sum;
|
||||
b = vec_sld(a, a, 8);
|
||||
sum = padd<Packet4i>(a, b);
|
||||
b = vec_sld(sum, sum, 4);
|
||||
sum = padd<Packet4i>(sum, b);
|
||||
return pfirst(sum);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
Packet2d b, sum;
|
||||
b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
|
||||
sum = padd<Packet2d>(a, b);
|
||||
return pfirst(sum);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d sum;
|
||||
sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
double first = predux<Packet2d>(sum);
|
||||
return static_cast<float>(first);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
|
||||
{
|
||||
Packet4i v[4], sum[4];
|
||||
|
||||
// It's easier and faster to transpose then add as columns
|
||||
// Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
|
||||
// Do the transpose, first set of moves
|
||||
v[0] = vec_mergeh(vecs[0], vecs[2]);
|
||||
v[1] = vec_mergel(vecs[0], vecs[2]);
|
||||
v[2] = vec_mergeh(vecs[1], vecs[3]);
|
||||
v[3] = vec_mergel(vecs[1], vecs[3]);
|
||||
// Get the resulting vectors
|
||||
sum[0] = vec_mergeh(v[0], v[2]);
|
||||
sum[1] = vec_mergel(v[0], v[2]);
|
||||
sum[2] = vec_mergeh(v[1], v[3]);
|
||||
sum[3] = vec_mergel(v[1], v[3]);
|
||||
|
||||
// Now do the summation:
|
||||
// Lines 0+1
|
||||
sum[0] = padd<Packet4i>(sum[0], sum[1]);
|
||||
// Lines 2+3
|
||||
sum[1] = padd<Packet4i>(sum[2], sum[3]);
|
||||
// Add the results
|
||||
sum[0] = padd<Packet4i>(sum[0], sum[1]);
|
||||
|
||||
return sum[0];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
||||
{
|
||||
Packet2d v[2], sum;
|
||||
v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8)));
|
||||
v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8)));
|
||||
|
||||
sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8));
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
PacketBlock<Packet4f,4> transpose;
|
||||
transpose.packet[0] = vecs[0];
|
||||
transpose.packet[1] = vecs[1];
|
||||
transpose.packet[2] = vecs[2];
|
||||
transpose.packet[3] = vecs[3];
|
||||
ptranspose(transpose);
|
||||
|
||||
Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
|
||||
sum = padd(sum, transpose.packet[2]);
|
||||
sum = padd(sum, transpose.packet[3]);
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Other reduction functions:
|
||||
// mul
|
||||
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
return aux[0] * aux[1] * aux[2] * aux[3];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
// Return predux_mul<Packet2d> of the subvectors product
|
||||
return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
|
||||
}
|
||||
|
||||
// min
|
||||
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
Packet4i b, res;
|
||||
b = pmin<Packet4i>(a, vec_sld(a, a, 8));
|
||||
res = pmin<Packet4i>(b, vec_sld(b, b, 4));
|
||||
return pfirst(res);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d b, res;
|
||||
b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
|
||||
return static_cast<float>(pfirst(res));
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
|
||||
{
|
||||
Packet4i b, res;
|
||||
b = pmax<Packet4i>(a, vec_sld(a, a, 8));
|
||||
res = pmax<Packet4i>(b, vec_sld(b, b, 4));
|
||||
return pfirst(res);
|
||||
}
|
||||
|
||||
// max
|
||||
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
|
||||
{
|
||||
return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet2d b, res;
|
||||
b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
|
||||
res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
|
||||
return static_cast<float>(pfirst(res));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4i,4>& kernel) {
|
||||
Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
|
||||
Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
|
||||
Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
|
||||
Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
|
||||
kernel.packet[0] = vec_mergeh(t0, t2);
|
||||
kernel.packet[1] = vec_mergel(t0, t2);
|
||||
kernel.packet[2] = vec_mergeh(t1, t3);
|
||||
kernel.packet[3] = vec_mergel(t1, t3);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet2d,2>& kernel) {
|
||||
Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
|
||||
Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
|
||||
kernel.packet[0] = t0;
|
||||
kernel.packet[1] = t1;
|
||||
}
|
||||
|
||||
/* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<Packet4f,4>& kernel) {
|
||||
PacketBlock<Packet2d,2> t0,t1,t2,t3;
|
||||
// copy top-left 2x2 Packet2d block
|
||||
t0.packet[0] = kernel.packet[0].v4f[0];
|
||||
t0.packet[1] = kernel.packet[1].v4f[0];
|
||||
|
||||
// copy top-right 2x2 Packet2d block
|
||||
t1.packet[0] = kernel.packet[0].v4f[1];
|
||||
t1.packet[1] = kernel.packet[1].v4f[1];
|
||||
|
||||
// copy bottom-left 2x2 Packet2d block
|
||||
t2.packet[0] = kernel.packet[2].v4f[0];
|
||||
t2.packet[1] = kernel.packet[3].v4f[0];
|
||||
|
||||
// copy bottom-right 2x2 Packet2d block
|
||||
t3.packet[0] = kernel.packet[2].v4f[1];
|
||||
t3.packet[1] = kernel.packet[3].v4f[1];
|
||||
|
||||
// Transpose all 2x2 blocks
|
||||
ptranspose(t0);
|
||||
ptranspose(t1);
|
||||
ptranspose(t2);
|
||||
ptranspose(t3);
|
||||
|
||||
// Copy back transposed blocks, but exchange t1 and t2 due to transposition
|
||||
kernel.packet[0].v4f[0] = t0.packet[0];
|
||||
kernel.packet[0].v4f[1] = t2.packet[0];
|
||||
kernel.packet[1].v4f[0] = t0.packet[1];
|
||||
kernel.packet[1].v4f[1] = t2.packet[1];
|
||||
kernel.packet[2].v4f[0] = t1.packet[0];
|
||||
kernel.packet[2].v4f[1] = t3.packet[0];
|
||||
kernel.packet[3].v4f[0] = t1.packet[1];
|
||||
kernel.packet[3].v4f[1] = t3.packet[1];
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
|
||||
Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
|
||||
Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
|
||||
Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
Packet4f result;
|
||||
result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
|
||||
result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
|
||||
Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
|
||||
Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
|
||||
return vec_sel(elsePacket, thenPacket, mask);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_ZVECTOR_H
|
||||
168
external/include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h
vendored
Normal file
168
external/include/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h
vendored
Normal file
@@ -0,0 +1,168 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
#define EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment
|
||||
*
|
||||
*/
|
||||
template<typename DstScalar,typename SrcScalar> struct assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
|
||||
{ internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
|
||||
};
|
||||
|
||||
// Empty overload for void type (used by PermutationMatrix)
|
||||
template<typename DstScalar> struct assign_op<DstScalar,void> {};
|
||||
|
||||
template<typename DstScalar,typename SrcScalar>
|
||||
struct functor_traits<assign_op<DstScalar,SrcScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<DstScalar>::ReadCost,
|
||||
PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with addition
|
||||
*
|
||||
*/
|
||||
template<typename DstScalar,typename SrcScalar> struct add_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
|
||||
{ internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename DstScalar,typename SrcScalar>
|
||||
struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
|
||||
PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with subtraction
|
||||
*
|
||||
*/
|
||||
template<typename DstScalar,typename SrcScalar> struct sub_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
|
||||
{ internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename DstScalar,typename SrcScalar>
|
||||
struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
|
||||
PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with multiplication
|
||||
*
|
||||
*/
|
||||
template<typename DstScalar, typename SrcScalar=DstScalar>
|
||||
struct mul_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
|
||||
{ internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename DstScalar, typename SrcScalar>
|
||||
struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
|
||||
PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with diviving
|
||||
*
|
||||
*/
|
||||
template<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }
|
||||
|
||||
template<int Alignment, typename Packet>
|
||||
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
|
||||
{ internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
|
||||
};
|
||||
template<typename DstScalar, typename SrcScalar>
|
||||
struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
|
||||
PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor for scalar/packet assignment with swapping
|
||||
*
|
||||
* It works as follow. For a non-vectorized evaluation loop, we have:
|
||||
* for(i) func(A.coeffRef(i), B.coeff(i));
|
||||
* where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.
|
||||
* Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable
|
||||
* B.coeff already returns a const reference to the underlying scalar value.
|
||||
*
|
||||
* The case of a vectorized loop is more tricky:
|
||||
* for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));
|
||||
* Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,
|
||||
* the actual alignment and Packet type.
|
||||
*
|
||||
*/
|
||||
template<typename Scalar> struct swap_assign_op {
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
|
||||
{
|
||||
#ifdef __CUDACC__
|
||||
// FIXME is there some kind of cuda::swap?
|
||||
Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
|
||||
#else
|
||||
using std::swap;
|
||||
swap(a,const_cast<Scalar&>(b));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<swap_assign_op<Scalar> > {
|
||||
enum {
|
||||
Cost = 3 * NumTraits<Scalar>::ReadCost,
|
||||
PacketAccess = packet_traits<Scalar>::Vectorizable
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_ASSIGNMENT_FUNCTORS_H
|
||||
475
external/include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h
vendored
Normal file
475
external/include/eigen3/Eigen/src/Core/functors/BinaryFunctors.h
vendored
Normal file
@@ -0,0 +1,475 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_BINARY_FUNCTORS_H
|
||||
#define EIGEN_BINARY_FUNCTORS_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//---------- associative binary functors ----------
|
||||
|
||||
template<typename Arg1, typename Arg2>
|
||||
struct binary_op_base
|
||||
{
|
||||
typedef Arg1 first_argument_type;
|
||||
typedef Arg2 second_argument_type;
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the sum of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;
|
||||
#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
||||
#else
|
||||
scalar_sum_op() {
|
||||
EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::padd(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
|
||||
{ return internal::predux(a); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2, // rough estimate!
|
||||
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
|
||||
// TODO vectorize mixed sum
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template specialization to deprecate the summation of boolean expressions.
|
||||
* This is required to solve Bug 426.
|
||||
* \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
|
||||
*/
|
||||
template<> struct scalar_sum_op<bool,bool> : scalar_sum_op<int,int> {
|
||||
EIGEN_DEPRECATED
|
||||
scalar_sum_op() {}
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the product of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct scalar_product_op : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
|
||||
#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
||||
#else
|
||||
scalar_product_op() {
|
||||
EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmul(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
|
||||
{ return internal::predux_mul(a); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
|
||||
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
|
||||
// TODO vectorize mixed product
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the conjugate product of two scalars
|
||||
*
|
||||
* This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct scalar_conj_product_op : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
|
||||
enum {
|
||||
Conj = NumTraits<LhsScalar>::IsComplex
|
||||
};
|
||||
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
|
||||
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
|
||||
{ return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = NumTraits<LhsScalar>::MulCost,
|
||||
PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the min of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmin(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
|
||||
{ return internal::predux_min(a); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_min_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
|
||||
PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMin
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the max of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pmax(a,b); }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
|
||||
{ return internal::predux_max(a); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_max_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
|
||||
PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMax
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functors for comparison of two scalars
|
||||
* \todo Implement packet-comparisons
|
||||
*/
|
||||
template<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_op;
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
|
||||
struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
template<ComparisonName Cmp, typename LhsScalar, typename RhsScalar>
|
||||
struct result_of<scalar_cmp_op<LhsScalar, RhsScalar, Cmp>(LhsScalar,RhsScalar)> {
|
||||
typedef bool type;
|
||||
};
|
||||
|
||||
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef bool result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
|
||||
};
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef bool result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
|
||||
};
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef bool result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
|
||||
};
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef bool result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
|
||||
};
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef bool result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
|
||||
};
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef bool result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
|
||||
};
|
||||
template<typename LhsScalar, typename RhsScalar>
|
||||
struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef bool result_type;
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
|
||||
};
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
|
||||
*
|
||||
* \sa MatrixBase::stableNorm(), class Redux
|
||||
*/
|
||||
template<typename Scalar>
|
||||
struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
|
||||
{
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
|
||||
{
|
||||
// This functor is used by hypotNorm only for which it is faster to first apply abs
|
||||
// on all coefficients prior to reduction through hypot.
|
||||
// This way we avoid calling abs on positive and real entries, and this also permits
|
||||
// to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
|
||||
// through the same functor...
|
||||
return internal::positive_real_hypot(x,y);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
|
||||
enum
|
||||
{
|
||||
Cost = 3 * NumTraits<Scalar>::AddCost +
|
||||
2 * NumTraits<Scalar>::MulCost +
|
||||
2 * scalar_div_cost<Scalar,false>::value,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the pow of two scalars
|
||||
*/
|
||||
template<typename Scalar, typename Exponent>
|
||||
struct scalar_pow_op : binary_op_base<Scalar,Exponent>
|
||||
{
|
||||
typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;
|
||||
#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)
|
||||
#else
|
||||
scalar_pow_op() {
|
||||
typedef Scalar LhsScalar;
|
||||
typedef Exponent RhsScalar;
|
||||
EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); }
|
||||
};
|
||||
template<typename Scalar, typename Exponent>
|
||||
struct functor_traits<scalar_pow_op<Scalar,Exponent> > {
|
||||
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
|
||||
};
|
||||
|
||||
|
||||
|
||||
//---------- non associative binary functors ----------
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the difference of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, MatrixBase::operator-
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;
|
||||
#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
||||
#else
|
||||
scalar_difference_op() {
|
||||
EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::psub(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
|
||||
enum {
|
||||
Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
|
||||
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the quotient of two scalars
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::operator/()
|
||||
*/
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct scalar_quotient_op : binary_op_base<LhsScalar,RhsScalar>
|
||||
{
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;
|
||||
#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
||||
#else
|
||||
scalar_quotient_op() {
|
||||
EIGEN_SCALAR_BINARY_OP_PLUGIN
|
||||
}
|
||||
#endif
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
|
||||
{ return internal::pdiv(a,b); }
|
||||
};
|
||||
template<typename LhsScalar,typename RhsScalar>
|
||||
struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
|
||||
typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
|
||||
enum {
|
||||
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
|
||||
Cost = scalar_div_cost<result_type,PacketAccess>::value
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the and of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator&&
|
||||
*/
|
||||
struct scalar_boolean_and_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_and_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the or of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator||
|
||||
*/
|
||||
struct scalar_boolean_or_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_or_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the xor of two booleans
|
||||
*
|
||||
* \sa class CwiseBinaryOp, ArrayBase::operator^
|
||||
*/
|
||||
struct scalar_boolean_xor_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
|
||||
};
|
||||
template<> struct functor_traits<scalar_boolean_xor_op> {
|
||||
enum {
|
||||
Cost = NumTraits<bool>::AddCost,
|
||||
PacketAccess = false
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
//---------- binary functors bound to a constant, thus appearing as a unary functor ----------
|
||||
|
||||
// The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value.
|
||||
// They are analogues to std::binder1st/binder2nd but with the following differences:
|
||||
// - they are compatible with packetOp
|
||||
// - they are portable across C++ versions (the std::binder* are deprecated in C++11)
|
||||
template<typename BinaryOp> struct bind1st_op : BinaryOp {
|
||||
|
||||
typedef typename BinaryOp::first_argument_type first_argument_type;
|
||||
typedef typename BinaryOp::second_argument_type second_argument_type;
|
||||
typedef typename BinaryOp::result_type result_type;
|
||||
|
||||
bind1st_op(const first_argument_type &val) : m_value(val) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); }
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const
|
||||
{ return BinaryOp::packetOp(internal::pset1<Packet>(m_value), b); }
|
||||
|
||||
first_argument_type m_value;
|
||||
};
|
||||
template<typename BinaryOp> struct functor_traits<bind1st_op<BinaryOp> > : functor_traits<BinaryOp> {};
|
||||
|
||||
|
||||
template<typename BinaryOp> struct bind2nd_op : BinaryOp {
|
||||
|
||||
typedef typename BinaryOp::first_argument_type first_argument_type;
|
||||
typedef typename BinaryOp::second_argument_type second_argument_type;
|
||||
typedef typename BinaryOp::result_type result_type;
|
||||
|
||||
bind2nd_op(const second_argument_type &val) : m_value(val) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); }
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
|
||||
{ return BinaryOp::packetOp(a,internal::pset1<Packet>(m_value)); }
|
||||
|
||||
second_argument_type m_value;
|
||||
};
|
||||
template<typename BinaryOp> struct functor_traits<bind2nd_op<BinaryOp> > : functor_traits<BinaryOp> {};
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_BINARY_FUNCTORS_H
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user