add Eigen as a dependency
This commit is contained in:
		
							
								
								
									
										156
									
								
								external/include/eigen3/unsupported/Eigen/AdolcForward
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										156
									
								
								external/include/eigen3/unsupported/Eigen/AdolcForward
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,156 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_ADLOC_FORWARD | ||||
| #define EIGEN_ADLOC_FORWARD | ||||
|  | ||||
| //-------------------------------------------------------------------------------- | ||||
| // | ||||
| // This file provides support for adolc's adouble type in forward mode. | ||||
| // ADOL-C is a C++ automatic differentiation library, | ||||
| // see https://projects.coin-or.org/ADOL-C for more information. | ||||
| // | ||||
| // Note that the maximal number of directions is controlled by | ||||
| // the preprocessor token NUMBER_DIRECTIONS. The default is 2. | ||||
| // | ||||
| //-------------------------------------------------------------------------------- | ||||
|  | ||||
| #define ADOLC_TAPELESS | ||||
| #ifndef NUMBER_DIRECTIONS | ||||
| # define NUMBER_DIRECTIONS 2 | ||||
| #endif | ||||
| #include <adolc/adtl.h> | ||||
|  | ||||
| // adolc defines some very stupid macros: | ||||
| #if defined(malloc) | ||||
| # undef malloc | ||||
| #endif | ||||
|  | ||||
| #if defined(calloc) | ||||
| # undef calloc | ||||
| #endif | ||||
|  | ||||
| #if defined(realloc) | ||||
| # undef realloc | ||||
| #endif | ||||
|  | ||||
| #include <Eigen/Core> | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** | ||||
|   * \defgroup AdolcForward_Module Adolc forward module | ||||
|   * This module provides support for adolc's adouble type in forward mode. | ||||
|   * ADOL-C is a C++ automatic differentiation library, | ||||
|   * see https://projects.coin-or.org/ADOL-C for more information. | ||||
|   * It mainly consists in: | ||||
|   *  - a struct Eigen::NumTraits<adtl::adouble> specialization | ||||
|   *  - overloads of internal::* math function for adtl::adouble type. | ||||
|   * | ||||
|   * Note that the maximal number of directions is controlled by | ||||
|   * the preprocessor token NUMBER_DIRECTIONS. The default is 2. | ||||
|   * | ||||
|   * \code | ||||
|   * #include <unsupported/Eigen/AdolcSupport> | ||||
|   * \endcode | ||||
|   */ | ||||
|   //@{ | ||||
|  | ||||
| } // namespace Eigen | ||||
|  | ||||
| // Eigen's require a few additional functions which must be defined in the same namespace | ||||
| // than the custom scalar type own namespace | ||||
| namespace adtl { | ||||
|  | ||||
| inline const adouble& conj(const adouble& x)  { return x; } | ||||
| inline const adouble& real(const adouble& x)  { return x; } | ||||
| inline adouble imag(const adouble&)    { return 0.; } | ||||
| inline adouble abs(const adouble&  x)  { return fabs(x); } | ||||
| inline adouble abs2(const adouble& x)  { return x*x; } | ||||
|  | ||||
| } | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| template<> struct NumTraits<adtl::adouble> | ||||
|     : NumTraits<double> | ||||
| { | ||||
|   typedef adtl::adouble Real; | ||||
|   typedef adtl::adouble NonInteger; | ||||
|   typedef adtl::adouble Nested; | ||||
|   enum { | ||||
|     IsComplex = 0, | ||||
|     IsInteger = 0, | ||||
|     IsSigned = 1, | ||||
|     RequireInitialization = 1, | ||||
|     ReadCost = 1, | ||||
|     AddCost = 1, | ||||
|     MulCost = 1 | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename Functor> class AdolcForwardJacobian : public Functor | ||||
| { | ||||
|   typedef adtl::adouble ActiveScalar; | ||||
| public: | ||||
|  | ||||
|   AdolcForwardJacobian() : Functor() {} | ||||
|   AdolcForwardJacobian(const Functor& f) : Functor(f) {} | ||||
|  | ||||
|   // forward constructors | ||||
|   template<typename T0> | ||||
|   AdolcForwardJacobian(const T0& a0) : Functor(a0) {} | ||||
|   template<typename T0, typename T1> | ||||
|   AdolcForwardJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {} | ||||
|   template<typename T0, typename T1, typename T2> | ||||
|   AdolcForwardJacobian(const T0& a0, const T1& a1, const T1& a2) : Functor(a0, a1, a2) {} | ||||
|  | ||||
|   typedef typename Functor::InputType InputType; | ||||
|   typedef typename Functor::ValueType ValueType; | ||||
|   typedef typename Functor::JacobianType JacobianType; | ||||
|  | ||||
|   typedef Matrix<ActiveScalar, InputType::SizeAtCompileTime, 1> ActiveInput; | ||||
|   typedef Matrix<ActiveScalar, ValueType::SizeAtCompileTime, 1> ActiveValue; | ||||
|  | ||||
|   void operator() (const InputType& x, ValueType* v, JacobianType* _jac) const | ||||
|   { | ||||
|     eigen_assert(v!=0); | ||||
|     if (!_jac) | ||||
|     { | ||||
|       Functor::operator()(x, v); | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     JacobianType& jac = *_jac; | ||||
|  | ||||
|     ActiveInput ax = x.template cast<ActiveScalar>(); | ||||
|     ActiveValue av(jac.rows()); | ||||
|  | ||||
|     for (int j=0; j<jac.cols(); j++) | ||||
|       for (int i=0; i<jac.cols(); i++) | ||||
|         ax[i].setADValue(j, i==j ? 1 : 0); | ||||
|  | ||||
|     Functor::operator()(ax, &av); | ||||
|  | ||||
|     for (int i=0; i<jac.rows(); i++) | ||||
|     { | ||||
|       (*v)[i] = av[i].getValue(); | ||||
|       for (int j=0; j<jac.cols(); j++) | ||||
|         jac.coeffRef(i,j) = av[i].getADValue(j); | ||||
|     } | ||||
|   } | ||||
| protected: | ||||
|  | ||||
| }; | ||||
|  | ||||
| //@} | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif // EIGEN_ADLOC_FORWARD | ||||
							
								
								
									
										224
									
								
								external/include/eigen3/unsupported/Eigen/AlignedVector3
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										224
									
								
								external/include/eigen3/unsupported/Eigen/AlignedVector3
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,224 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_ALIGNED_VECTOR3 | ||||
| #define EIGEN_ALIGNED_VECTOR3 | ||||
|  | ||||
| #include <Eigen/Geometry> | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** | ||||
|   * \defgroup AlignedVector3_Module Aligned vector3 module | ||||
|   * | ||||
|   * \code | ||||
|   * #include <unsupported/Eigen/AlignedVector3> | ||||
|   * \endcode | ||||
|   */ | ||||
|   //@{ | ||||
|  | ||||
|  | ||||
| /** \class AlignedVector3 | ||||
|   * | ||||
|   * \brief A vectorization friendly 3D vector | ||||
|   * | ||||
|   * This class represents a 3D vector internally using a 4D vector | ||||
|   * such that vectorization can be seamlessly enabled. Of course, | ||||
|   * the same result can be achieved by directly using a 4D vector. | ||||
|   * This class makes this process simpler. | ||||
|   * | ||||
|   */ | ||||
| // TODO specialize Cwise | ||||
| template<typename _Scalar> class AlignedVector3; | ||||
|  | ||||
| namespace internal { | ||||
| template<typename _Scalar> struct traits<AlignedVector3<_Scalar> > | ||||
|   : traits<Matrix<_Scalar,3,1,0,4,1> > | ||||
| { | ||||
| }; | ||||
| } | ||||
|  | ||||
| template<typename _Scalar> class AlignedVector3 | ||||
|   : public MatrixBase<AlignedVector3<_Scalar> > | ||||
| { | ||||
|     typedef Matrix<_Scalar,4,1> CoeffType; | ||||
|     CoeffType m_coeffs; | ||||
|   public: | ||||
|  | ||||
|     typedef MatrixBase<AlignedVector3<_Scalar> > Base;	 | ||||
|     EIGEN_DENSE_PUBLIC_INTERFACE(AlignedVector3) | ||||
|     using Base::operator*; | ||||
|  | ||||
|     inline Index rows() const { return 3; } | ||||
|     inline Index cols() const { return 1; } | ||||
|      | ||||
|     Scalar* data() { return m_coeffs.data(); } | ||||
|     const Scalar* data() const { return m_coeffs.data(); } | ||||
|     Index innerStride() const { return 1; } | ||||
|     Index outerStride() const { return 3; } | ||||
|  | ||||
|     inline const Scalar& coeff(Index row, Index col) const | ||||
|     { return m_coeffs.coeff(row, col); } | ||||
|  | ||||
|     inline Scalar& coeffRef(Index row, Index col) | ||||
|     { return m_coeffs.coeffRef(row, col); } | ||||
|  | ||||
|     inline const Scalar& coeff(Index index) const | ||||
|     { return m_coeffs.coeff(index); } | ||||
|  | ||||
|     inline Scalar& coeffRef(Index index) | ||||
|     { return m_coeffs.coeffRef(index);} | ||||
|  | ||||
|  | ||||
|     inline AlignedVector3(const Scalar& x, const Scalar& y, const Scalar& z) | ||||
|       : m_coeffs(x, y, z, Scalar(0)) | ||||
|     {} | ||||
|  | ||||
|     inline AlignedVector3(const AlignedVector3& other) | ||||
|       : Base(), m_coeffs(other.m_coeffs) | ||||
|     {} | ||||
|  | ||||
|     template<typename XprType, int Size=XprType::SizeAtCompileTime> | ||||
|     struct generic_assign_selector {}; | ||||
|  | ||||
|     template<typename XprType> struct generic_assign_selector<XprType,4> | ||||
|     { | ||||
|       inline static void run(AlignedVector3& dest, const XprType& src) | ||||
|       { | ||||
|         dest.m_coeffs = src; | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<typename XprType> struct generic_assign_selector<XprType,3> | ||||
|     { | ||||
|       inline static void run(AlignedVector3& dest, const XprType& src) | ||||
|       { | ||||
|         dest.m_coeffs.template head<3>() = src; | ||||
|         dest.m_coeffs.w() = Scalar(0); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<typename Derived> | ||||
|     inline AlignedVector3(const MatrixBase<Derived>& other) | ||||
|     { | ||||
|       generic_assign_selector<Derived>::run(*this,other.derived()); | ||||
|     } | ||||
|  | ||||
|     inline AlignedVector3& operator=(const AlignedVector3& other) | ||||
|     { m_coeffs = other.m_coeffs; return *this; } | ||||
|  | ||||
|     template <typename Derived> | ||||
|     inline AlignedVector3& operator=(const MatrixBase<Derived>& other) | ||||
|     { | ||||
|       generic_assign_selector<Derived>::run(*this,other.derived()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     inline AlignedVector3 operator+(const AlignedVector3& other) const | ||||
|     { return AlignedVector3(m_coeffs + other.m_coeffs); } | ||||
|  | ||||
|     inline AlignedVector3& operator+=(const AlignedVector3& other) | ||||
|     { m_coeffs += other.m_coeffs; return *this; } | ||||
|  | ||||
|     inline AlignedVector3 operator-(const AlignedVector3& other) const | ||||
|     { return AlignedVector3(m_coeffs - other.m_coeffs); } | ||||
|  | ||||
|     inline AlignedVector3 operator-=(const AlignedVector3& other) | ||||
|     { m_coeffs -= other.m_coeffs; return *this; } | ||||
|  | ||||
|     inline AlignedVector3 operator*(const Scalar& s) const | ||||
|     { return AlignedVector3(m_coeffs * s); } | ||||
|  | ||||
|     inline friend AlignedVector3 operator*(const Scalar& s,const AlignedVector3& vec) | ||||
|     { return AlignedVector3(s * vec.m_coeffs); } | ||||
|  | ||||
|     inline AlignedVector3& operator*=(const Scalar& s) | ||||
|     { m_coeffs *= s; return *this; } | ||||
|  | ||||
|     inline AlignedVector3 operator/(const Scalar& s) const | ||||
|     { return AlignedVector3(m_coeffs / s); } | ||||
|  | ||||
|     inline AlignedVector3& operator/=(const Scalar& s) | ||||
|     { m_coeffs /= s; return *this; } | ||||
|  | ||||
|     inline Scalar dot(const AlignedVector3& other) const | ||||
|     { | ||||
|       eigen_assert(m_coeffs.w()==Scalar(0)); | ||||
|       eigen_assert(other.m_coeffs.w()==Scalar(0)); | ||||
|       return m_coeffs.dot(other.m_coeffs); | ||||
|     } | ||||
|  | ||||
|     inline void normalize() | ||||
|     { | ||||
|       m_coeffs /= norm(); | ||||
|     } | ||||
|  | ||||
|     inline AlignedVector3 normalized() const | ||||
|     { | ||||
|       return AlignedVector3(m_coeffs / norm()); | ||||
|     } | ||||
|  | ||||
|     inline Scalar sum() const | ||||
|     { | ||||
|       eigen_assert(m_coeffs.w()==Scalar(0)); | ||||
|       return m_coeffs.sum(); | ||||
|     } | ||||
|  | ||||
|     inline Scalar squaredNorm() const | ||||
|     { | ||||
|       eigen_assert(m_coeffs.w()==Scalar(0)); | ||||
|       return m_coeffs.squaredNorm(); | ||||
|     } | ||||
|  | ||||
|     inline Scalar norm() const | ||||
|     { | ||||
|       using std::sqrt; | ||||
|       return sqrt(squaredNorm()); | ||||
|     } | ||||
|  | ||||
|     inline AlignedVector3 cross(const AlignedVector3& other) const | ||||
|     { | ||||
|       return AlignedVector3(m_coeffs.cross3(other.m_coeffs)); | ||||
|     } | ||||
|  | ||||
|     template<typename Derived> | ||||
|     inline bool isApprox(const MatrixBase<Derived>& other, const RealScalar& eps=NumTraits<Scalar>::dummy_precision()) const | ||||
|     { | ||||
|       return m_coeffs.template head<3>().isApprox(other,eps); | ||||
|     } | ||||
|      | ||||
|     CoeffType& coeffs() { return m_coeffs; } | ||||
|     const CoeffType& coeffs() const { return m_coeffs; } | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template<typename _Scalar> | ||||
| struct eval<AlignedVector3<_Scalar>, Dense> | ||||
| { | ||||
|  typedef const AlignedVector3<_Scalar>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Scalar> | ||||
| struct evaluator<AlignedVector3<Scalar> > | ||||
|   : evaluator<Matrix<Scalar,4,1> > | ||||
| { | ||||
|   typedef AlignedVector3<Scalar> XprType; | ||||
|   typedef evaluator<Matrix<Scalar,4,1> > Base; | ||||
|    | ||||
|   evaluator(const XprType &m) : Base(m.coeffs()) {}   | ||||
| }; | ||||
|  | ||||
| } | ||||
|  | ||||
| //@} | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif // EIGEN_ALIGNED_VECTOR3 | ||||
							
								
								
									
										31
									
								
								external/include/eigen3/unsupported/Eigen/ArpackSupport
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								external/include/eigen3/unsupported/Eigen/ArpackSupport
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_ARPACKSUPPORT_MODULE_H | ||||
| #define EIGEN_ARPACKSUPPORT_MODULE_H | ||||
|  | ||||
| #include <Eigen/Core> | ||||
|  | ||||
| #include <Eigen/src/Core/util/DisableStupidWarnings.h> | ||||
|  | ||||
| /** \defgroup ArpackSupport_Module Arpack support module | ||||
|   * | ||||
|   * This module provides a wrapper to Arpack, a library for sparse eigenvalue decomposition. | ||||
|   * | ||||
|   * \code | ||||
|   * #include <Eigen/ArpackSupport> | ||||
|   * \endcode | ||||
|   */ | ||||
|  | ||||
| #include <Eigen/SparseCholesky> | ||||
| #include "src/Eigenvalues/ArpackSelfAdjointEigenSolver.h" | ||||
|  | ||||
| #include <Eigen/src/Core/util/ReenableStupidWarnings.h> | ||||
|  | ||||
| #endif // EIGEN_ARPACKSUPPORT_MODULE_H | ||||
| /* vim: set filetype=cpp et sw=2 ts=2 ai: */ | ||||
							
								
								
									
										40
									
								
								external/include/eigen3/unsupported/Eigen/AutoDiff
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								external/include/eigen3/unsupported/Eigen/AutoDiff
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_AUTODIFF_MODULE | ||||
| #define EIGEN_AUTODIFF_MODULE | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** | ||||
|   * \defgroup AutoDiff_Module Auto Diff module | ||||
|   * | ||||
|   * This module features forward automatic differentation via a simple | ||||
|   * templated scalar type wrapper AutoDiffScalar. | ||||
|   * | ||||
|   * Warning : this should NOT be confused with numerical differentiation, which | ||||
|   * is a different method and has its own module in Eigen : \ref NumericalDiff_Module. | ||||
|   * | ||||
|   * \code | ||||
|   * #include <unsupported/Eigen/AutoDiff> | ||||
|   * \endcode | ||||
|   */ | ||||
| //@{ | ||||
|  | ||||
| } | ||||
|  | ||||
| #include "src/AutoDiff/AutoDiffScalar.h" | ||||
| // #include "src/AutoDiff/AutoDiffVector.h" | ||||
| #include "src/AutoDiff/AutoDiffJacobian.h" | ||||
|  | ||||
| namespace Eigen { | ||||
| //@} | ||||
| } | ||||
|  | ||||
| #endif // EIGEN_AUTODIFF_MODULE | ||||
							
								
								
									
										95
									
								
								external/include/eigen3/unsupported/Eigen/BVH
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								external/include/eigen3/unsupported/Eigen/BVH
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,95 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2009 Ilya Baran <ibaran@mit.edu> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_BVH_MODULE_H | ||||
| #define EIGEN_BVH_MODULE_H | ||||
|  | ||||
| #include <Eigen/Core> | ||||
| #include <Eigen/Geometry> | ||||
| #include <Eigen/StdVector> | ||||
| #include <algorithm> | ||||
| #include <queue> | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** | ||||
|   * \defgroup BVH_Module BVH module | ||||
|   * \brief This module provides generic bounding volume hierarchy algorithms | ||||
|   * and reference tree implementations. | ||||
|   * | ||||
|   * | ||||
|   * \code | ||||
|   * #include <unsupported/Eigen/BVH> | ||||
|   * \endcode | ||||
|   * | ||||
|   * A bounding volume hierarchy (BVH) can accelerate many geometric queries.  This module provides a generic implementation | ||||
|   * of the two basic algorithms over a BVH: intersection of a query object against all objects in the hierarchy and minimization | ||||
|   * of a function over the objects in the hierarchy.  It also provides intersection and minimization over a cartesian product of | ||||
|   * two BVH's.  A BVH accelerates intersection by using the fact that if a query object does not intersect a volume, then it cannot | ||||
|   * intersect any object contained in that volume.  Similarly, a BVH accelerates minimization because the minimum of a function | ||||
|   * over a volume is no greater than the minimum of a function over any object contained in it. | ||||
|   * | ||||
|   * Some sample queries that can be written in terms of intersection are: | ||||
|   *   - Determine all points where a ray intersects a triangle mesh | ||||
|   *   - Given a set of points, determine which are contained in a query sphere | ||||
|   *   - Given a set of spheres, determine which contain the query point | ||||
|   *   - Given a set of disks, determine if any is completely contained in a query rectangle (represent each 2D disk as a point \f$(x,y,r)\f$ | ||||
|   *     in 3D and represent the rectangle as a pyramid based on the original rectangle and shrinking in the \f$r\f$ direction) | ||||
|   *   - Given a set of points, count how many pairs are \f$d\pm\epsilon\f$ apart (done by looking at the cartesian product of the set | ||||
|   *     of points with itself) | ||||
|   * | ||||
|   * Some sample queries that can be written in terms of function minimization over a set of objects are: | ||||
|   *   - Find the intersection between a ray and a triangle mesh closest to the ray origin (function is infinite off the ray) | ||||
|   *   - Given a polyline and a query point, determine the closest point on the polyline to the query | ||||
|   *   - Find the diameter of a point cloud (done by looking at the cartesian product and using negative distance as the function) | ||||
|   *   - Determine how far two meshes are from colliding (this is also a cartesian product query) | ||||
|   * | ||||
|   * This implementation decouples the basic algorithms both from the type of hierarchy (and the types of the bounding volumes) and | ||||
|   * from the particulars of the query.  To enable abstraction from the BVH, the BVH is required to implement a generic mechanism | ||||
|   * for traversal.  To abstract from the query, the query is responsible for keeping track of results. | ||||
|   * | ||||
|   * To be used in the algorithms, a hierarchy must implement the following traversal mechanism (see KdBVH for a sample implementation): \code | ||||
|       typedef Volume  //the type of bounding volume | ||||
|       typedef Object  //the type of object in the hierarchy | ||||
|       typedef Index   //a reference to a node in the hierarchy--typically an int or a pointer | ||||
|       typedef VolumeIterator //an iterator type over node children--returns Index | ||||
|       typedef ObjectIterator //an iterator over object (leaf) children--returns const Object & | ||||
|       Index getRootIndex() const //returns the index of the hierarchy root | ||||
|       const Volume &getVolume(Index index) const //returns the bounding volume of the node at given index | ||||
|       void getChildren(Index index, VolumeIterator &outVBegin, VolumeIterator &outVEnd, | ||||
|                       ObjectIterator &outOBegin, ObjectIterator &outOEnd) const | ||||
|       //getChildren takes a node index and makes [outVBegin, outVEnd) range over its node children | ||||
|       //and [outOBegin, outOEnd) range over its object children | ||||
|     \endcode | ||||
|   * | ||||
|   * To use the hierarchy, call BVIntersect or BVMinimize, passing it a BVH (or two, for cartesian product) and a minimizer or intersector. | ||||
|   * For an intersection query on a single BVH, the intersector encapsulates the query and must provide two functions: | ||||
|   * \code | ||||
|       bool intersectVolume(const Volume &volume) //returns true if the query intersects the volume | ||||
|       bool intersectObject(const Object &object) //returns true if the intersection search should terminate immediately | ||||
|     \endcode | ||||
|   * The guarantee that BVIntersect provides is that intersectObject will be called on every object whose bounding volume | ||||
|   * intersects the query (but possibly on other objects too) unless the search is terminated prematurely.  It is the | ||||
|   * responsibility of the intersectObject function to keep track of the results in whatever manner is appropriate. | ||||
|   * The cartesian product intersection and the BVMinimize queries are similar--see their individual documentation. | ||||
|   * | ||||
|   * The following is a simple but complete example for how to use the BVH to accelerate the search for a closest red-blue point pair: | ||||
|   * \include BVH_Example.cpp | ||||
|   * Output: \verbinclude BVH_Example.out | ||||
|   */ | ||||
| } | ||||
|  | ||||
| //@{ | ||||
|  | ||||
| #include "src/BVH/BVAlgorithms.h" | ||||
| #include "src/BVH/KdBVH.h" | ||||
|  | ||||
| //@} | ||||
|  | ||||
| #endif // EIGEN_BVH_MODULE_H | ||||
							
								
								
									
										154
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/Tensor
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										154
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/Tensor
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,154 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| //#ifndef EIGEN_CXX11_TENSOR_MODULE | ||||
| //#define EIGEN_CXX11_TENSOR_MODULE | ||||
|  | ||||
| #include "../../../Eigen/Core" | ||||
|  | ||||
| #ifdef EIGEN_USE_SYCL | ||||
| #undef min | ||||
| #undef max | ||||
| #undef isnan | ||||
| #undef isinf | ||||
| #undef isfinite | ||||
| #include <SYCL/sycl.hpp> | ||||
| #include <map> | ||||
| #include <memory> | ||||
| #include <utility> | ||||
| #endif | ||||
|  | ||||
| #include <Eigen/src/Core/util/DisableStupidWarnings.h> | ||||
|  | ||||
| #include "../SpecialFunctions" | ||||
| #include "src/util/CXX11Meta.h" | ||||
| #include "src/util/MaxSizeVector.h" | ||||
|  | ||||
| /** \defgroup CXX11_Tensor_Module Tensor Module | ||||
|   * | ||||
|   * This module provides a Tensor class for storing arbitrarily indexed | ||||
|   * objects. | ||||
|   * | ||||
|   * \code | ||||
|   * #include <Eigen/CXX11/Tensor> | ||||
|   * \endcode | ||||
|   * | ||||
|   * Much of the documentation can be found \ref eigen_tensors "here". | ||||
|   */ | ||||
|  | ||||
| #include <cmath> | ||||
| #include <cstddef> | ||||
| #include <cstring> | ||||
|  | ||||
| #ifdef _WIN32 | ||||
| typedef __int16 int16_t; | ||||
| typedef unsigned __int16 uint16_t; | ||||
| typedef __int32 int32_t; | ||||
| typedef unsigned __int32 uint32_t; | ||||
| typedef __int64 int64_t; | ||||
| typedef unsigned __int64 uint64_t; | ||||
| #else | ||||
| #include <stdint.h> | ||||
| #endif | ||||
|  | ||||
| #if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 | ||||
| #include <random> | ||||
| #endif | ||||
|  | ||||
| #ifdef _WIN32 | ||||
| #include <windows.h> | ||||
| #elif defined(__APPLE__) | ||||
| #include <mach/mach_time.h> | ||||
| #else | ||||
| #include <time.h> | ||||
| #endif | ||||
|  | ||||
| #ifdef EIGEN_USE_THREADS | ||||
| #include "ThreadPool" | ||||
| #endif | ||||
|  | ||||
| #ifdef EIGEN_USE_GPU | ||||
| #include <iostream> | ||||
| #include <cuda_runtime.h> | ||||
| #if __cplusplus >= 201103L | ||||
| #include <atomic> | ||||
| #include <unistd.h> | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| #include "src/Tensor/TensorMacros.h" | ||||
| #include "src/Tensor/TensorForwardDeclarations.h" | ||||
| #include "src/Tensor/TensorMeta.h" | ||||
| #include "src/Tensor/TensorFunctors.h" | ||||
| #include "src/Tensor/TensorCostModel.h" | ||||
| #include "src/Tensor/TensorDeviceDefault.h" | ||||
| #include "src/Tensor/TensorDeviceThreadPool.h" | ||||
| #include "src/Tensor/TensorDeviceCuda.h" | ||||
| #include "src/Tensor/TensorDeviceSycl.h" | ||||
| #include "src/Tensor/TensorIndexList.h" | ||||
| #include "src/Tensor/TensorDimensionList.h" | ||||
| #include "src/Tensor/TensorDimensions.h" | ||||
| #include "src/Tensor/TensorInitializer.h" | ||||
| #include "src/Tensor/TensorTraits.h" | ||||
| #include "src/Tensor/TensorRandom.h" | ||||
| #include "src/Tensor/TensorUInt128.h" | ||||
| #include "src/Tensor/TensorIntDiv.h" | ||||
| #include "src/Tensor/TensorGlobalFunctions.h" | ||||
|  | ||||
| #include "src/Tensor/TensorBase.h" | ||||
|  | ||||
| #include "src/Tensor/TensorEvaluator.h" | ||||
| #include "src/Tensor/TensorExpr.h" | ||||
| #include "src/Tensor/TensorReduction.h" | ||||
| #include "src/Tensor/TensorReductionCuda.h" | ||||
| #include "src/Tensor/TensorArgMax.h" | ||||
| #include "src/Tensor/TensorConcatenation.h" | ||||
| #include "src/Tensor/TensorContractionMapper.h" | ||||
| #include "src/Tensor/TensorContractionBlocking.h" | ||||
| #include "src/Tensor/TensorContraction.h" | ||||
| #include "src/Tensor/TensorContractionThreadPool.h" | ||||
| #include "src/Tensor/TensorContractionCuda.h" | ||||
| #include "src/Tensor/TensorConversion.h" | ||||
| #include "src/Tensor/TensorConvolution.h" | ||||
| #include "src/Tensor/TensorFFT.h" | ||||
| #include "src/Tensor/TensorPatch.h" | ||||
| #include "src/Tensor/TensorImagePatch.h" | ||||
| #include "src/Tensor/TensorVolumePatch.h" | ||||
| #include "src/Tensor/TensorBroadcasting.h" | ||||
| #include "src/Tensor/TensorChipping.h" | ||||
| #include "src/Tensor/TensorInflation.h" | ||||
| #include "src/Tensor/TensorLayoutSwap.h" | ||||
| #include "src/Tensor/TensorMorphing.h" | ||||
| #include "src/Tensor/TensorPadding.h" | ||||
| #include "src/Tensor/TensorReverse.h" | ||||
| #include "src/Tensor/TensorShuffling.h" | ||||
| #include "src/Tensor/TensorStriding.h" | ||||
| #include "src/Tensor/TensorCustomOp.h" | ||||
| #include "src/Tensor/TensorEvalTo.h" | ||||
| #include "src/Tensor/TensorForcedEval.h" | ||||
| #include "src/Tensor/TensorGenerator.h" | ||||
| #include "src/Tensor/TensorAssign.h" | ||||
| #include "src/Tensor/TensorScan.h" | ||||
|  | ||||
| #include "src/Tensor/TensorSycl.h" | ||||
| #include "src/Tensor/TensorExecutor.h" | ||||
| #include "src/Tensor/TensorDevice.h" | ||||
|  | ||||
| #include "src/Tensor/TensorStorage.h" | ||||
| #include "src/Tensor/Tensor.h" | ||||
| #include "src/Tensor/TensorFixedSize.h" | ||||
| #include "src/Tensor/TensorMap.h" | ||||
| #include "src/Tensor/TensorRef.h" | ||||
|  | ||||
| #include "src/Tensor/TensorIO.h" | ||||
|  | ||||
| #include <Eigen/src/Core/util/ReenableStupidWarnings.h> | ||||
|  | ||||
| //#endif // EIGEN_CXX11_TENSOR_MODULE | ||||
							
								
								
									
										42
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/TensorSymmetry
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE | ||||
| #define EIGEN_CXX11_TENSORSYMMETRY_MODULE | ||||
|  | ||||
| #include <unsupported/Eigen/CXX11/Tensor> | ||||
|  | ||||
| #include <Eigen/src/Core/util/DisableStupidWarnings.h> | ||||
|  | ||||
| #include "src/util/CXX11Meta.h" | ||||
|  | ||||
| /** \defgroup CXX11_TensorSymmetry_Module Tensor Symmetry Module | ||||
|   * | ||||
|   * This module provides a classes that allow for the definition of | ||||
|   * symmetries w.r.t. tensor indices. | ||||
|   * | ||||
|   * Including this module will implicitly include the Tensor module. | ||||
|   * | ||||
|   * \code | ||||
|   * #include <Eigen/TensorSymmetry> | ||||
|   * \endcode | ||||
|   */ | ||||
|  | ||||
| #include "src/TensorSymmetry/util/TemplateGroupTheory.h" | ||||
| #include "src/TensorSymmetry/Symmetry.h" | ||||
| #include "src/TensorSymmetry/StaticSymmetry.h" | ||||
| #include "src/TensorSymmetry/DynamicSymmetry.h" | ||||
|  | ||||
| #include <Eigen/src/Core/util/ReenableStupidWarnings.h> | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE | ||||
|  | ||||
| /* | ||||
|  * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; | ||||
|  */ | ||||
							
								
								
									
										65
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/ThreadPool
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/ThreadPool
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_MODULE | ||||
| #define EIGEN_CXX11_THREADPOOL_MODULE | ||||
|  | ||||
| #include "../../../Eigen/Core" | ||||
|  | ||||
| #include <Eigen/src/Core/util/DisableStupidWarnings.h> | ||||
|  | ||||
| /** \defgroup CXX11_ThreadPool_Module C++11 ThreadPool Module | ||||
|   * | ||||
|   * This module provides 2 threadpool implementations | ||||
|   *  - a simple reference implementation | ||||
|   *  - a faster non blocking implementation | ||||
|   * | ||||
|   * This module requires C++11. | ||||
|   * | ||||
|   * \code | ||||
|   * #include <Eigen/CXX11/ThreadPool> | ||||
|   * \endcode | ||||
|   */ | ||||
|  | ||||
|  | ||||
| // The code depends on CXX11, so only include the module if the | ||||
| // compiler supports it. | ||||
| #if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 | ||||
| #include <cstddef> | ||||
| #include <cstring> | ||||
| #include <stdint.h> | ||||
| #include <time.h> | ||||
|  | ||||
| #include <vector> | ||||
| #include <atomic> | ||||
| #include <condition_variable> | ||||
| #include <deque> | ||||
| #include <mutex> | ||||
| #include <thread> | ||||
| #include <functional> | ||||
| #include <memory> | ||||
|  | ||||
| #include "src/util/CXX11Meta.h" | ||||
| #include "src/util/MaxSizeVector.h" | ||||
|  | ||||
| #include "src/ThreadPool/ThreadLocal.h" | ||||
| #include "src/ThreadPool/ThreadYield.h" | ||||
| #include "src/ThreadPool/EventCount.h" | ||||
| #include "src/ThreadPool/RunQueue.h" | ||||
| #include "src/ThreadPool/ThreadPoolInterface.h" | ||||
| #include "src/ThreadPool/ThreadEnvironment.h" | ||||
| #include "src/ThreadPool/SimpleThreadPool.h" | ||||
| #include "src/ThreadPool/NonBlockingThreadPool.h" | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #include <Eigen/src/Core/util/ReenableStupidWarnings.h> | ||||
|  | ||||
| #endif // EIGEN_CXX11_THREADPOOL_MODULE | ||||
|  | ||||
							
								
								
									
										527
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										527
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,527 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class Tensor | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief The tensor class. | ||||
|   * | ||||
|   * The %Tensor class is the work-horse for all \em dense tensors within Eigen. | ||||
|   * | ||||
|   * The %Tensor class encompasses only dynamic-size objects so far. | ||||
|   * | ||||
|   * The first two template parameters are required: | ||||
|   * \tparam Scalar_  Numeric type, e.g. float, double, int or `std::complex<float>`. | ||||
|   *                 User defined scalar types are supported as well (see \ref user_defined_scalars "here"). | ||||
|   * \tparam NumIndices_ Number of indices (i.e. rank of the tensor) | ||||
|   * | ||||
|   * The remaining template parameters are optional -- in most cases you don't have to worry about them. | ||||
|   * \tparam Options_  A combination of either \b #RowMajor or \b #ColMajor, and of either | ||||
|   *                 \b #AutoAlign or \b #DontAlign. | ||||
|   *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required | ||||
|   *                 for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization. | ||||
|   *                 Support for such operations (i.e. adding two tensors etc.) is planned. | ||||
|   * | ||||
|   * You can access elements of tensors using normal subscripting: | ||||
|   * | ||||
|   * \code | ||||
|   * Eigen::Tensor<double, 4> t(10, 10, 10, 10); | ||||
|   * t(0, 1, 2, 3) = 42.0; | ||||
|   * \endcode | ||||
|   * | ||||
|   * This class can be extended with the help of the plugin mechanism described on the page | ||||
|   * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN. | ||||
|   * | ||||
|   * <i><b>Some notes:</b></i> | ||||
|   * | ||||
|   * <dl> | ||||
|   * <dt><b>Relation to other parts of Eigen:</b></dt> | ||||
|   * <dd>The midterm development goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that | ||||
|   * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code | ||||
|   * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor | ||||
|   * class does not provide any of these features and is only available as a stand-alone class that just allows for | ||||
|   * coefficient access. Also, when fixed-size tensors are implemented, the number of template arguments is likely to | ||||
|   * change dramatically.</dd> | ||||
|   * </dl> | ||||
|   * | ||||
|   * \ref TopicStorageOrders | ||||
|   */ | ||||
|  | ||||
| template<typename Scalar_, int NumIndices_, int Options_, typename IndexType_> | ||||
| class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > | ||||
| { | ||||
|   public: | ||||
|     typedef Tensor<Scalar_, NumIndices_, Options_, IndexType_> Self; | ||||
|     typedef TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > Base; | ||||
|     typedef typename Eigen::internal::nested<Self>::type Nested; | ||||
|     typedef typename internal::traits<Self>::StorageKind StorageKind; | ||||
|     typedef typename internal::traits<Self>::Index Index; | ||||
|     typedef Scalar_ Scalar; | ||||
|     typedef typename NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef typename Base::CoeffReturnType CoeffReturnType; | ||||
|  | ||||
|     enum { | ||||
|       IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign), | ||||
|       Layout = Options_ & RowMajor ? RowMajor : ColMajor, | ||||
|       CoordAccess = true, | ||||
|       RawAccess = true | ||||
|     }; | ||||
|  | ||||
|     static const int Options = Options_; | ||||
|     static const int NumIndices = NumIndices_; | ||||
|     typedef DSizes<Index, NumIndices_> Dimensions; | ||||
|  | ||||
|   protected: | ||||
|     TensorStorage<Scalar, Dimensions, Options> m_storage; | ||||
|  | ||||
| #ifdef EIGEN_HAS_SFINAE | ||||
|     template<typename CustomIndices> | ||||
|     struct isOfNormalIndex{ | ||||
|       static const bool is_array = internal::is_base_of<array<Index, NumIndices>, CustomIndices>::value; | ||||
|       static const bool is_int = NumTraits<CustomIndices>::IsInteger; | ||||
|       static const bool value = is_array | is_int; | ||||
|     }; | ||||
| #endif | ||||
|  | ||||
|   public: | ||||
|     // Metadata | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         rank()                   const { return NumIndices; } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         dimension(std::size_t n) const { return m_storage.dimensions()[n]; } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&             dimensions()             const { return m_storage.dimensions(); } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                         size()                   const { return m_storage.size(); } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar                        *data()                        { return m_storage.data(); } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar                  *data()                  const { return m_storage.data(); } | ||||
|  | ||||
|     // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED | ||||
|     // work, because that uses base().coeffRef() - and we don't yet | ||||
|     // implement a similar class hierarchy | ||||
|     inline Self& base()             { return *this; } | ||||
|     inline const Self& base() const { return *this; } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const | ||||
|     { | ||||
|       // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     // normal indices | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       eigen_internal_assert(checkIndexRange(indices)); | ||||
|       return m_storage.data()[linearizedIndex(indices)]; | ||||
|     } | ||||
|  | ||||
|     // custom indices | ||||
| #ifdef EIGEN_HAS_SFINAE | ||||
|     template<typename CustomIndices, | ||||
|              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) ) | ||||
|     > | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(CustomIndices& indices) const | ||||
|     { | ||||
|         return coeff(internal::customIndices2Array<Index,NumIndices>(indices)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return m_storage.data()[0]; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const | ||||
|     { | ||||
|       eigen_internal_assert(index >= 0 && index < size()); | ||||
|       return m_storage.data()[index]; | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) | ||||
|     { | ||||
|       // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     // normal indices | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) | ||||
|     { | ||||
|       eigen_internal_assert(checkIndexRange(indices)); | ||||
|       return m_storage.data()[linearizedIndex(indices)]; | ||||
|     } | ||||
|  | ||||
|     // custom indices | ||||
| #ifdef EIGEN_HAS_SFINAE | ||||
|     template<typename CustomIndices, | ||||
|              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) ) | ||||
|              > | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(CustomIndices& indices) | ||||
|     { | ||||
|         return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef() | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return m_storage.data()[0]; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) | ||||
|     { | ||||
|       eigen_internal_assert(index >= 0 && index < size()); | ||||
|       return m_storage.data()[index]; | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const | ||||
|     { | ||||
|       // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); | ||||
|     } | ||||
| #else | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const | ||||
|     { | ||||
|       return coeff(array<Index, 2>(i0, i1)); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const | ||||
|     { | ||||
|       return coeff(array<Index, 3>(i0, i1, i2)); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const | ||||
|     { | ||||
|       return coeff(array<Index, 4>(i0, i1, i2, i3)); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const | ||||
|     { | ||||
|       return coeff(array<Index, 5>(i0, i1, i2, i3, i4)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     // custom indices | ||||
| #ifdef EIGEN_HAS_SFINAE | ||||
|     template<typename CustomIndices, | ||||
|              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) ) | ||||
|     > | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(CustomIndices& indices) const | ||||
|     { | ||||
|         return coeff(internal::customIndices2Array<Index,NumIndices>(indices)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     // normal indices | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       return coeff(indices); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const | ||||
|     { | ||||
|       eigen_internal_assert(index >= 0 && index < size()); | ||||
|       return coeff(index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()() const | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return coeff(); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const | ||||
|     { | ||||
|       // The bracket operator is only for vectors, use the parenthesis operator instead. | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return coeff(index); | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) | ||||
|     { | ||||
|       // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); | ||||
|     } | ||||
| #else | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) | ||||
|     { | ||||
|       return coeffRef(array<Index, 2>(i0, i1)); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) | ||||
|     { | ||||
|       return coeffRef(array<Index, 3>(i0, i1, i2)); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) | ||||
|     { | ||||
|       return coeffRef(array<Index, 4>(i0, i1, i2, i3)); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) | ||||
|     { | ||||
|       return coeffRef(array<Index, 5>(i0, i1, i2, i3, i4)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     // normal indices | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) | ||||
|     { | ||||
|       return coeffRef(indices); | ||||
|     } | ||||
|  | ||||
|     // custom indices | ||||
| #ifdef EIGEN_HAS_SFINAE | ||||
|     template<typename CustomIndices, | ||||
|              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) ) | ||||
|     > | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(CustomIndices& indices) | ||||
|     { | ||||
|       return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) | ||||
|     { | ||||
|       eigen_assert(index >= 0 && index < size()); | ||||
|       return coeffRef(index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()() | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return coeffRef(); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) | ||||
|     { | ||||
|       // The bracket operator is only for vectors, use the parenthesis operator instead | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return coeffRef(index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Tensor() | ||||
|       : m_storage() | ||||
|     { | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Tensor(const Self& other) | ||||
|       : m_storage(other.m_storage) | ||||
|     { | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions) | ||||
|         : m_storage(firstDimension, otherDimensions...) | ||||
|     { | ||||
|       // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
| #else | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1) | ||||
|       : m_storage(dim1, array<Index, 1>(dim1)) | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2) | ||||
|       : m_storage(dim1*dim2, array<Index, 2>(dim1, dim2)) | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3) | ||||
|       : m_storage(dim1*dim2*dim3, array<Index, 3>(dim1, dim2, dim3)) | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4) | ||||
|       : m_storage(dim1*dim2*dim3*dim4, array<Index, 4>(dim1, dim2, dim3, dim4)) | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) | ||||
|       : m_storage(dim1*dim2*dim3*dim4*dim5, array<Index, 5>(dim1, dim2, dim3, dim4, dim5)) | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     /** Normal Dimension */ | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array<Index, NumIndices>& dimensions) | ||||
|         : m_storage(internal::array_prod(dimensions), dimensions) | ||||
|     { | ||||
|       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<Tensor, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other.derived()); | ||||
|       resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|     } | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, WriteAccessors>& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<Tensor, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other.derived()); | ||||
|       resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<Tensor, const Tensor> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Tensor& operator=(const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<Tensor, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> EIGEN_DEVICE_FUNC | ||||
|     void resize(Index firstDimension, IndexTypes... otherDimensions) | ||||
|     { | ||||
|       // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}}); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     /** Normal Dimension */ | ||||
|     EIGEN_DEVICE_FUNC void resize(const array<Index, NumIndices>& dimensions) | ||||
|     { | ||||
|       int i; | ||||
|       Index size = Index(1); | ||||
|       for (i = 0; i < NumIndices; i++) { | ||||
|         internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]); | ||||
|         size *= dimensions[i]; | ||||
|       } | ||||
|       #ifdef EIGEN_INITIALIZE_COEFFS | ||||
|         bool size_changed = size != this->size(); | ||||
|         m_storage.resize(size, dimensions); | ||||
|         if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED | ||||
|       #else | ||||
|         m_storage.resize(size, dimensions); | ||||
|       #endif | ||||
|     } | ||||
|  | ||||
|     // Why this overload, DSizes is derived from array ??? // | ||||
|     EIGEN_DEVICE_FUNC void resize(const DSizes<Index, NumIndices>& dimensions) { | ||||
|       array<Index, NumIndices> dims; | ||||
|       for (int i = 0; i < NumIndices; ++i) { | ||||
|         dims[i] = dimensions[i]; | ||||
|       } | ||||
|       resize(dims); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     void resize() | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       // Nothing to do: rank 0 tensors have fixed size | ||||
|     } | ||||
|  | ||||
|     /** Custom Dimension */ | ||||
| #ifdef EIGEN_HAS_SFINAE | ||||
|     template<typename CustomDimension, | ||||
|              EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomDimension>::value) ) | ||||
|     > | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(CustomDimension& dimensions) | ||||
|     { | ||||
|       resize(internal::customIndices2Array<Index,NumIndices>(dimensions)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
| #ifndef EIGEN_EMULATE_CXX11_META_H | ||||
|     template <typename std::ptrdiff_t... Indices> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     void resize(const Sizes<Indices...>& dimensions) { | ||||
|       array<Index, NumIndices> dims; | ||||
|       for (int i = 0; i < NumIndices; ++i) { | ||||
|         dims[i] = static_cast<Index>(dimensions[i]); | ||||
|       } | ||||
|       resize(dims); | ||||
|     } | ||||
| #else | ||||
|     template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) { | ||||
|       array<Index, NumIndices> dims; | ||||
|       for (int i = 0; i < NumIndices; ++i) { | ||||
|         dims[i] = static_cast<Index>(dimensions[i]); | ||||
|       } | ||||
|       resize(dims); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|   protected: | ||||
|  | ||||
|     bool checkIndexRange(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       using internal::array_apply_and_reduce; | ||||
|       using internal::array_zip_and_reduce; | ||||
|       using internal::greater_equal_zero_op; | ||||
|       using internal::logical_and_op; | ||||
|       using internal::lesser_op; | ||||
|  | ||||
|       return | ||||
|         // check whether the indices are all >= 0 | ||||
|         array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) && | ||||
|         // check whether the indices fit in the dimensions | ||||
|         array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions()); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       if (Options&RowMajor) { | ||||
|         return m_storage.dimensions().IndexOfRowMajor(indices); | ||||
|       } else { | ||||
|         return m_storage.dimensions().IndexOfColMajor(indices); | ||||
|       } | ||||
|     } | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_H | ||||
							
								
								
									
										299
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										299
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,299 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com> | ||||
| //                    Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
| /** \class TensorIndexTuple | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor + Index Tuple class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| template<typename XprType> | ||||
| struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef Tuple<Index, typename XprTraits::Scalar> Scalar; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename XprType> | ||||
| struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorIndexTupleOp<XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename XprType> | ||||
| struct nested<TensorIndexTupleOp<XprType>, 1, | ||||
|               typename eval<TensorIndexTupleOp<XprType> >::type> | ||||
| { | ||||
|   typedef TensorIndexTupleOp<XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| template<typename XprType> | ||||
| class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index; | ||||
|   typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr) | ||||
|       : m_xpr(expr) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|   expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
| }; | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> | ||||
| { | ||||
|   typedef TensorIndexTupleOp<ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|  | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; | ||||
|   static const int NumDims = internal::array_size<Dimensions>::value; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, | ||||
|     PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device) { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { | ||||
|     return m_impl.dimensions(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return CoeffReturnType(index, m_impl.coeff(index)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|  protected: | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| /** \class TensorTupleIndex | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>. | ||||
|   * | ||||
|   */ | ||||
| template<typename ReduceOp, typename Dims, typename XprType> | ||||
| struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef Index Scalar; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename ReduceOp, typename Dims, typename XprType> | ||||
| struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename ReduceOp, typename Dims, typename XprType> | ||||
| struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1, | ||||
|               typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type> | ||||
| { | ||||
|   typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| template<typename ReduceOp, typename Dims, typename XprType> | ||||
| class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index; | ||||
|   typedef Index CoeffReturnType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, | ||||
|                                                           const ReduceOp& reduce_op, | ||||
|                                                           const int return_dim, | ||||
|                                                           const Dims& reduce_dims) | ||||
|       : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|   expression() const { return m_xpr; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const ReduceOp& reduce_op() const { return m_reduce_op; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const Dims& reduce_dims() const { return m_reduce_dims; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   int return_dim() const { return m_return_dim; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const ReduceOp m_reduce_op; | ||||
|     const int m_return_dim; | ||||
|     const Dims m_reduce_dims; | ||||
| }; | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename ReduceOp, typename Dims, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType; | ||||
|   typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions; | ||||
|   typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions; | ||||
|   static const int NumDims = internal::array_size<InputDimensions>::value; | ||||
|   typedef array<Index, NumDims> StrideDims; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, | ||||
|     PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_orig_impl(op.expression(), device), | ||||
|         m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), | ||||
|         m_return_dim(op.return_dim()) { | ||||
|  | ||||
|     gen_strides(m_orig_impl.dimensions(), m_strides); | ||||
|     if (Layout == static_cast<int>(ColMajor)) { | ||||
|       const Index total_size = internal::array_prod(m_orig_impl.dimensions()); | ||||
|       m_stride_mod = (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : total_size; | ||||
|     } else { | ||||
|       const Index total_size = internal::array_prod(m_orig_impl.dimensions()); | ||||
|       m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; | ||||
|     } | ||||
|     m_stride_div = m_strides[m_return_dim]; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { | ||||
|     return m_impl.dimensions(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { | ||||
|     const TupleType v = m_impl.coeff(index); | ||||
|     return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     const double compute_cost = 1.0 + | ||||
|         (m_return_dim < 0 ? 0.0 : (TensorOpCost::ModCost<Index>() + TensorOpCost::DivCost<Index>())); | ||||
|     return m_orig_impl.costPerCoeff(vectorized) + | ||||
|            m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost); | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) { | ||||
|     if (m_return_dim < 0) { | ||||
|       return;  // Won't be using the strides. | ||||
|     } | ||||
|     eigen_assert(m_return_dim < NumDims && | ||||
|                  "Asking to convert index to a dimension outside of the rank"); | ||||
|  | ||||
|     // Calculate m_stride_div and m_stride_mod, which are used to | ||||
|     // calculate the value of an index w.r.t. the m_return_dim. | ||||
|     if (Layout == static_cast<int>(ColMajor)) { | ||||
|       strides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         strides[i] = strides[i-1] * dims[i-1]; | ||||
|       } | ||||
|     } else { | ||||
|       strides[NumDims-1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         strides[i] = strides[i+1] * dims[i+1]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl; | ||||
|   TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl; | ||||
|   const int m_return_dim; | ||||
|   StrideDims m_strides; | ||||
|   Index m_stride_mod; | ||||
|   Index m_stride_div; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H | ||||
							
								
								
									
										181
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,181 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorAssign | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief The tensor assignment class. | ||||
|   * | ||||
|   * This class is represents the assignment of the values resulting from the evaluation of | ||||
|   * the rhs expression to the memory locations denoted by the lhs expression. | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename LhsXprType, typename RhsXprType> | ||||
| struct traits<TensorAssignOp<LhsXprType, RhsXprType> > | ||||
| { | ||||
|   typedef typename LhsXprType::Scalar Scalar; | ||||
|   typedef typename traits<LhsXprType>::StorageKind StorageKind; | ||||
|   typedef typename promote_index_type<typename traits<LhsXprType>::Index, | ||||
|                                       typename traits<RhsXprType>::Index>::type Index; | ||||
|   typedef typename LhsXprType::Nested LhsNested; | ||||
|   typedef typename RhsXprType::Nested RhsNested; | ||||
|   typedef typename remove_reference<LhsNested>::type _LhsNested; | ||||
|   typedef typename remove_reference<RhsNested>::type _RhsNested; | ||||
|   static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions; | ||||
|   static const int Layout = internal::traits<LhsXprType>::Layout; | ||||
|  | ||||
|   enum { | ||||
|     Flags = 0 | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename LhsXprType, typename RhsXprType> | ||||
| struct eval<TensorAssignOp<LhsXprType, RhsXprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorAssignOp<LhsXprType, RhsXprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename LhsXprType, typename RhsXprType> | ||||
| struct nested<TensorAssignOp<LhsXprType, RhsXprType>, 1, typename eval<TensorAssignOp<LhsXprType, RhsXprType> >::type> | ||||
| { | ||||
|   typedef TensorAssignOp<LhsXprType, RhsXprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename LhsXprType, typename RhsXprType> | ||||
| class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType> > | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorAssignOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename LhsXprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorAssignOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs) | ||||
|       : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {} | ||||
|  | ||||
|     /** \returns the nested expressions */ | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     typename internal::remove_all<typename LhsXprType::Nested>::type& | ||||
|     lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename RhsXprType::Nested>::type& | ||||
|     rhsExpression() const { return m_rhs_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr; | ||||
|     const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename LeftArgType, typename RightArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device> | ||||
| { | ||||
|   typedef TensorAssignOp<LeftArgType, RightArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<LeftArgType, Device>::Layout, | ||||
|     RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : | ||||
|       m_leftImpl(op.lhsExpression(), device), | ||||
|       m_rightImpl(op.rhsExpression(), device) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const | ||||
|   { | ||||
|     // The dimensions of the lhs and the rhs tensors should be equal to prevent | ||||
|     // overflows and ensure the result is fully initialized. | ||||
|     // TODO: use left impl instead if right impl dimensions are known at compile time. | ||||
|     return m_rightImpl.dimensions(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { | ||||
|     eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); | ||||
|     m_leftImpl.evalSubExprsIfNeeded(NULL); | ||||
|     // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non | ||||
|     // null value), attempt to evaluate the rhs expression in place. Returns true iff in place | ||||
|     // evaluation isn't supported and the caller still needs to manually assign the values generated | ||||
|     // by the rhs to the lhs. | ||||
|     return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data()); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_leftImpl.cleanup(); | ||||
|     m_rightImpl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { | ||||
|     m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { | ||||
|     const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned; | ||||
|     const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned; | ||||
|     m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i)); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_leftImpl.coeff(index); | ||||
|   } | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return m_leftImpl.template packet<LoadMode>(index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     // We assume that evalPacket or evalScalar is called to perform the | ||||
|     // assignment and account for the cost of the write here, but reduce left | ||||
|     // cost by one load because we are using m_leftImpl.coeffRef. | ||||
|     TensorOpCost left = m_leftImpl.costPerCoeff(vectorized); | ||||
|     return m_rightImpl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost( | ||||
|                numext::maxi(0.0, left.bytes_loaded() - sizeof(CoeffReturnType)), | ||||
|                left.bytes_stored(), left.compute_cycles()) + | ||||
|            TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); } | ||||
|  | ||||
|  private: | ||||
|   TensorEvaluator<LeftArgType, Device> m_leftImpl; | ||||
|   TensorEvaluator<RightArgType, Device> m_rightImpl; | ||||
| }; | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H | ||||
							
								
								
									
										1012
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1012
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										392
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										392
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,392 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorBroadcasting | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor broadcasting class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename Broadcast, typename XprType> | ||||
| struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename Broadcast, typename XprType> | ||||
| struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorBroadcastingOp<Broadcast, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Broadcast, typename XprType> | ||||
| struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorBroadcastingOp<Broadcast, XprType> >::type> | ||||
| { | ||||
|   typedef TensorBroadcastingOp<Broadcast, XprType> type; | ||||
| }; | ||||
|  | ||||
| template <typename Dims> | ||||
| struct is_input_scalar { | ||||
|   static const bool value = false; | ||||
| }; | ||||
| template <> | ||||
| struct is_input_scalar<Sizes<> > { | ||||
|   static const bool value = true; | ||||
| }; | ||||
| #ifndef EIGEN_EMULATE_CXX11_META_H | ||||
| template <typename std::size_t... Indices> | ||||
| struct is_input_scalar<Sizes<Indices...> > { | ||||
|   static const bool value = (Sizes<Indices...>::total_size == 1); | ||||
| }; | ||||
| #endif | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename Broadcast, typename XprType> | ||||
| class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorBroadcastingOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorBroadcastingOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast) | ||||
|       : m_xpr(expr), m_broadcast(broadcast) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const Broadcast& broadcast() const { return m_broadcast; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const Broadcast m_broadcast; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename Broadcast, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorBroadcastingOp<Broadcast, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = true, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : m_broadcast(op.broadcast()),m_impl(op.expression(), device) | ||||
|   { | ||||
|     // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar | ||||
|     // and store the result in a scalar. Instead one should reshape the scalar into a a N-D | ||||
|     // tensor with N >= 1 of 1 element first and then broadcast. | ||||
|     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     const InputDimensions& input_dims = m_impl.dimensions(); | ||||
|     const Broadcast& broadcast = op.broadcast(); | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       eigen_assert(input_dims[i] > 0); | ||||
|       m_dimensions[i] = input_dims[i] * broadcast[i]; | ||||
|     } | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_inputStrides[0] = 1; | ||||
|       m_outputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; | ||||
|         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; | ||||
|       } | ||||
|     } else { | ||||
|       m_inputStrides[NumDims-1] = 1; | ||||
|       m_outputStrides[NumDims-1] = 1; | ||||
|       for (int i = NumDims-2; i >= 0; --i) { | ||||
|         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; | ||||
|         m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) { | ||||
|       return m_impl.coeff(0); | ||||
|     } | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       return coeffColMajor(index); | ||||
|     } else { | ||||
|       return coeffRowMajor(index); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // TODO: attempt to speed this up. The integer divisions and modulo are slow | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const | ||||
|   { | ||||
|     Index inputIndex = 0; | ||||
|     for (int i = NumDims - 1; i > 0; --i) { | ||||
|       const Index idx = index / m_outputStrides[i]; | ||||
|       if (internal::index_statically_eq<Broadcast>(i, 1)) { | ||||
|         eigen_assert(idx < m_impl.dimensions()[i]); | ||||
|         inputIndex += idx * m_inputStrides[i]; | ||||
|       } else { | ||||
|         if (internal::index_statically_eq<InputDimensions>(i, 1)) { | ||||
|           eigen_assert(idx % m_impl.dimensions()[i] == 0); | ||||
|         } else { | ||||
|           inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; | ||||
|         } | ||||
|       } | ||||
|       index -= idx * m_outputStrides[i]; | ||||
|     } | ||||
|     if (internal::index_statically_eq<Broadcast>(0, 1)) { | ||||
|       eigen_assert(index < m_impl.dimensions()[0]); | ||||
|       inputIndex += index; | ||||
|     } else { | ||||
|       if (internal::index_statically_eq<InputDimensions>(0, 1)) { | ||||
|         eigen_assert(index % m_impl.dimensions()[0] == 0); | ||||
|       } else { | ||||
|         inputIndex += (index % m_impl.dimensions()[0]); | ||||
|       } | ||||
|     } | ||||
|     return m_impl.coeff(inputIndex); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const | ||||
|   { | ||||
|     Index inputIndex = 0; | ||||
|     for (int i = 0; i < NumDims - 1; ++i) { | ||||
|       const Index idx = index / m_outputStrides[i]; | ||||
|       if (internal::index_statically_eq<Broadcast>(i, 1)) { | ||||
|         eigen_assert(idx < m_impl.dimensions()[i]); | ||||
|         inputIndex += idx * m_inputStrides[i]; | ||||
|       } else { | ||||
|         if (internal::index_statically_eq<InputDimensions>(i, 1)) { | ||||
|           eigen_assert(idx % m_impl.dimensions()[i] == 0); | ||||
|         } else { | ||||
|           inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; | ||||
|         } | ||||
|       } | ||||
|       index -= idx * m_outputStrides[i]; | ||||
|     } | ||||
|     if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) { | ||||
|       eigen_assert(index < m_impl.dimensions()[NumDims-1]); | ||||
|       inputIndex += index; | ||||
|     } else { | ||||
|       if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) { | ||||
|         eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); | ||||
|       } else { | ||||
|         inputIndex += (index % m_impl.dimensions()[NumDims-1]); | ||||
|       } | ||||
|     } | ||||
|     return m_impl.coeff(inputIndex); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) { | ||||
|       return internal::pset1<PacketReturnType>(m_impl.coeff(0)); | ||||
|     } | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       return packetColMajor<LoadMode>(index); | ||||
|     } else { | ||||
|       return packetRowMajor<LoadMode>(index); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Ignore the LoadMode and always use unaligned loads since we can't guarantee | ||||
|   // the alignment at compile time. | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     const Index originalIndex = index; | ||||
|  | ||||
|     Index inputIndex = 0; | ||||
|     for (int i = NumDims - 1; i > 0; --i) { | ||||
|       const Index idx = index / m_outputStrides[i]; | ||||
|       if (internal::index_statically_eq<Broadcast>(i, 1)) { | ||||
|         eigen_assert(idx < m_impl.dimensions()[i]); | ||||
|         inputIndex += idx * m_inputStrides[i]; | ||||
|       } else { | ||||
|         if (internal::index_statically_eq<InputDimensions>(i, 1)) { | ||||
|           eigen_assert(idx % m_impl.dimensions()[i] == 0); | ||||
|         } else { | ||||
|           inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; | ||||
|         } | ||||
|       } | ||||
|       index -= idx * m_outputStrides[i]; | ||||
|     } | ||||
|     Index innermostLoc; | ||||
|     if (internal::index_statically_eq<Broadcast>(0, 1)) { | ||||
|       eigen_assert(index < m_impl.dimensions()[0]); | ||||
|       innermostLoc = index; | ||||
|     } else { | ||||
|       if (internal::index_statically_eq<InputDimensions>(0, 1)) { | ||||
|         eigen_assert(index % m_impl.dimensions()[0] == 0); | ||||
|         innermostLoc = 0; | ||||
|       } else { | ||||
|         innermostLoc = index % m_impl.dimensions()[0]; | ||||
|       } | ||||
|     } | ||||
|     inputIndex += innermostLoc; | ||||
|  | ||||
|     // Todo: this could be extended to the second dimension if we're not | ||||
|     // broadcasting alongside the first dimension, and so on. | ||||
|     if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) { | ||||
|       return m_impl.template packet<Unaligned>(inputIndex); | ||||
|     } else { | ||||
|       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|       values[0] = m_impl.coeff(inputIndex); | ||||
|       for (int i = 1; i < PacketSize; ++i) { | ||||
|         values[i] = coeffColMajor(originalIndex+i); | ||||
|       } | ||||
|       PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|       return rslt; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     const Index originalIndex = index; | ||||
|  | ||||
|     Index inputIndex = 0; | ||||
|     for (int i = 0; i < NumDims - 1; ++i) { | ||||
|       const Index idx = index / m_outputStrides[i]; | ||||
|       if (internal::index_statically_eq<Broadcast>(i, 1)) { | ||||
|         eigen_assert(idx < m_impl.dimensions()[i]); | ||||
|         inputIndex += idx * m_inputStrides[i]; | ||||
|       } else { | ||||
|         if (internal::index_statically_eq<InputDimensions>(i, 1)) { | ||||
|           eigen_assert(idx % m_impl.dimensions()[i] == 0); | ||||
|         } else { | ||||
|           inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; | ||||
|         } | ||||
|       } | ||||
|       index -= idx * m_outputStrides[i]; | ||||
|     } | ||||
|     Index innermostLoc; | ||||
|     if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) { | ||||
|       eigen_assert(index < m_impl.dimensions()[NumDims-1]); | ||||
|       innermostLoc = index; | ||||
|     } else { | ||||
|       if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) { | ||||
|         eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); | ||||
|         innermostLoc = 0; | ||||
|       } else { | ||||
|         innermostLoc = index % m_impl.dimensions()[NumDims-1]; | ||||
|       } | ||||
|     } | ||||
|     inputIndex += innermostLoc; | ||||
|  | ||||
|     // Todo: this could be extended to the second dimension if we're not | ||||
|     // broadcasting alongside the first dimension, and so on. | ||||
|     if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) { | ||||
|       return m_impl.template packet<Unaligned>(inputIndex); | ||||
|     } else { | ||||
|       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|       values[0] = m_impl.coeff(inputIndex); | ||||
|       for (int i = 1; i < PacketSize; ++i) { | ||||
|         values[i] = coeffRowMajor(originalIndex+i); | ||||
|       } | ||||
|       PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|       return rslt; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     double compute_cost = TensorOpCost::AddCost<Index>(); | ||||
|     if (NumDims > 0) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         compute_cost += TensorOpCost::DivCost<Index>(); | ||||
|         if (internal::index_statically_eq<Broadcast>(i, 1)) { | ||||
|           compute_cost += | ||||
|               TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); | ||||
|         } else { | ||||
|           if (!internal::index_statically_eq<InputDimensions>(i, 1)) { | ||||
|             compute_cost += TensorOpCost::MulCost<Index>() + | ||||
|                             TensorOpCost::ModCost<Index>() + | ||||
|                             TensorOpCost::AddCost<Index>(); | ||||
|           } | ||||
|         } | ||||
|         compute_cost += | ||||
|             TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); | ||||
|       } | ||||
|     } | ||||
|     return m_impl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|   const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } | ||||
|  | ||||
|   Broadcast functor() const { return m_broadcast; } | ||||
|  | ||||
|  protected: | ||||
|   const Broadcast m_broadcast; | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims> m_outputStrides; | ||||
|   array<Index, NumDims> m_inputStrides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H | ||||
							
								
								
									
										384
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										384
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,384 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorKChippingReshaping | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
|  | ||||
| namespace internal { | ||||
| template<DenseIndex DimId, typename XprType> | ||||
| struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions - 1; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<DenseIndex DimId, typename XprType> | ||||
| struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorChippingOp<DimId, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<DenseIndex DimId, typename XprType> | ||||
| struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type> | ||||
| { | ||||
|   typedef TensorChippingOp<DimId, XprType> type; | ||||
| }; | ||||
|  | ||||
| template <DenseIndex DimId> | ||||
| struct DimensionId | ||||
| { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { | ||||
|     eigen_assert(dim == DimId); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { | ||||
|     return DimId; | ||||
|   } | ||||
| }; | ||||
| template <> | ||||
| struct DimensionId<Dynamic> | ||||
| { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { | ||||
|     eigen_assert(dim >= 0); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { | ||||
|     return actual_dim; | ||||
|   } | ||||
|  private: | ||||
|   const DenseIndex actual_dim; | ||||
| }; | ||||
|  | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<DenseIndex DimId, typename XprType> | ||||
| class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> > | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) | ||||
|       : m_xpr(expr), m_offset(offset), m_dim(dim) { | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const Index offset() const { return m_offset; } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const Index dim() const { return m_dim.actualDim(); } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|   expression() const { return m_xpr; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other) | ||||
|   { | ||||
|     typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign; | ||||
|     Assign assign(*this, other); | ||||
|     internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   template<typename OtherDerived> | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other) | ||||
|   { | ||||
|     typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign; | ||||
|     Assign assign(*this, other); | ||||
|     internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const Index m_offset; | ||||
|     const internal::DimensionId<DimId> m_dim; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<DenseIndex DimId, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorChippingOp<DimId, ArgType> XprType; | ||||
|   static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   static const int NumDims = NumInputDims-1; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|  | ||||
|   enum { | ||||
|     // Alignment can't be guaranteed at compile time since it depends on the | ||||
|     // slice offsets. | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     eigen_assert(NumInputDims > m_dim.actualDim()); | ||||
|  | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); | ||||
|  | ||||
|     int j = 0; | ||||
|     for (int i = 0; i < NumInputDims; ++i) { | ||||
|       if (i != m_dim.actualDim()) { | ||||
|         m_dimensions[j] = input_dims[i]; | ||||
|         ++j; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     m_stride = 1; | ||||
|     m_inputStride = 1; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = 0; i < m_dim.actualDim(); ++i) { | ||||
|         m_stride *= input_dims[i]; | ||||
|         m_inputStride *= input_dims[i]; | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { | ||||
|         m_stride *= input_dims[i]; | ||||
|         m_inputStride *= input_dims[i]; | ||||
|       } | ||||
|     } | ||||
|     m_inputStride *= input_dims[m_dim.actualDim()]; | ||||
|     m_inputOffset = m_stride * op.offset(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_impl.coeff(srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) || | ||||
| 	(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { | ||||
|       // m_stride is equal to 1, so let's avoid the integer division. | ||||
|       eigen_assert(m_stride == 1); | ||||
|       Index inputIndex = index * m_inputStride + m_inputOffset; | ||||
|       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|       for (int i = 0; i < PacketSize; ++i) { | ||||
|         values[i] = m_impl.coeff(inputIndex); | ||||
|         inputIndex += m_inputStride; | ||||
|       } | ||||
|       PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|       return rslt; | ||||
|     } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || | ||||
| 	       (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) { | ||||
|       // m_stride is aways greater than index, so let's avoid the integer division. | ||||
|       eigen_assert(m_stride > index); | ||||
|       return m_impl.template packet<LoadMode>(index + m_inputOffset); | ||||
|     } else { | ||||
|       const Index idx = index / m_stride; | ||||
|       const Index rem = index - idx * m_stride; | ||||
|       if (rem + PacketSize <= m_stride) { | ||||
|         Index inputIndex = idx * m_inputStride + m_inputOffset + rem; | ||||
|         return m_impl.template packet<LoadMode>(inputIndex); | ||||
|       } else { | ||||
|         // Cross the stride boundary. Fallback to slow path. | ||||
|         EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|         for (int i = 0; i < PacketSize; ++i) { | ||||
|           values[i] = coeff(index); | ||||
|           ++index; | ||||
|         } | ||||
|         PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|         return rslt; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     double cost = 0; | ||||
|     if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && | ||||
|          m_dim.actualDim() == 0) || | ||||
|         (static_cast<int>(Layout) == static_cast<int>(RowMajor) && | ||||
|          m_dim.actualDim() == NumInputDims - 1)) { | ||||
|       cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); | ||||
|     } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && | ||||
|                 m_dim.actualDim() == NumInputDims - 1) || | ||||
|                (static_cast<int>(Layout) == static_cast<int>(RowMajor) && | ||||
|                 m_dim.actualDim() == 0)) { | ||||
|       cost += TensorOpCost::AddCost<Index>(); | ||||
|     } else { | ||||
|       cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() + | ||||
|               3 * TensorOpCost::AddCost<Index>(); | ||||
|     } | ||||
|  | ||||
|     return m_impl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { | ||||
|     CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data()); | ||||
|     if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) || | ||||
|          (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) && | ||||
|         result) { | ||||
|       return result + m_inputOffset; | ||||
|     } else { | ||||
|       return NULL; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const | ||||
|   { | ||||
|     Index inputIndex; | ||||
|     if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) || | ||||
| 	(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { | ||||
|       // m_stride is equal to 1, so let's avoid the integer division. | ||||
|       eigen_assert(m_stride == 1); | ||||
|       inputIndex = index * m_inputStride + m_inputOffset; | ||||
|     } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) || | ||||
| 	       (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) { | ||||
|       // m_stride is aways greater than index, so let's avoid the integer division. | ||||
|       eigen_assert(m_stride > index); | ||||
|       inputIndex = index + m_inputOffset; | ||||
|     } else { | ||||
|       const Index idx = index / m_stride; | ||||
|       inputIndex = idx * m_inputStride + m_inputOffset; | ||||
|       index -= idx * m_stride; | ||||
|       inputIndex += index; | ||||
|     } | ||||
|     return inputIndex; | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|   Index m_stride; | ||||
|   Index m_inputOffset; | ||||
|   Index m_inputStride; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   const internal::DimensionId<DimId> m_dim; | ||||
|   const Device& m_device; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as lvalue | ||||
| template<DenseIndex DimId, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device> | ||||
|   : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base; | ||||
|   typedef TensorChippingOp<DimId, ArgType> XprType; | ||||
|   static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   static const int NumDims = NumInputDims-1; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : Base(op, device) | ||||
|     { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) | ||||
|   { | ||||
|     return this->m_impl.coeffRef(this->srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|  | ||||
|     if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == 0) || | ||||
| 	(static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) { | ||||
|       // m_stride is equal to 1, so let's avoid the integer division. | ||||
|       eigen_assert(this->m_stride == 1); | ||||
|       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|       internal::pstore<CoeffReturnType, PacketReturnType>(values, x); | ||||
|       Index inputIndex = index * this->m_inputStride + this->m_inputOffset; | ||||
|       for (int i = 0; i < PacketSize; ++i) { | ||||
|         this->m_impl.coeffRef(inputIndex) = values[i]; | ||||
|         inputIndex += this->m_inputStride; | ||||
|       } | ||||
|     } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) || | ||||
| 	       (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == 0)) { | ||||
|       // m_stride is aways greater than index, so let's avoid the integer division. | ||||
|       eigen_assert(this->m_stride > index); | ||||
|       this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x); | ||||
|     } else { | ||||
|       const Index idx = index / this->m_stride; | ||||
|       const Index rem = index - idx * this->m_stride; | ||||
|       if (rem + PacketSize <= this->m_stride) { | ||||
|         const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; | ||||
|         this->m_impl.template writePacket<StoreMode>(inputIndex, x); | ||||
|       } else { | ||||
|         // Cross stride boundary. Fallback to slow path. | ||||
|         EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|         internal::pstore<CoeffReturnType, PacketReturnType>(values, x); | ||||
|         for (int i = 0; i < PacketSize; ++i) { | ||||
|           this->coeffRef(index) = values[i]; | ||||
|           ++index; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H | ||||
							
								
								
									
										361
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										361
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,361 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorConcatenationOp | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor concatenation class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename Axis, typename LhsXprType, typename RhsXprType> | ||||
| struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> > | ||||
| { | ||||
|   // Type promotion to handle the case where the types of the lhs and the rhs are different. | ||||
|   typedef typename promote_storage_type<typename LhsXprType::Scalar, | ||||
|                                         typename RhsXprType::Scalar>::ret Scalar; | ||||
|   typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, | ||||
|                                         typename traits<RhsXprType>::StorageKind>::ret StorageKind; | ||||
|   typedef typename promote_index_type<typename traits<LhsXprType>::Index, | ||||
|                                       typename traits<RhsXprType>::Index>::type Index; | ||||
|   typedef typename LhsXprType::Nested LhsNested; | ||||
|   typedef typename RhsXprType::Nested RhsNested; | ||||
|   typedef typename remove_reference<LhsNested>::type _LhsNested; | ||||
|   typedef typename remove_reference<RhsNested>::type _RhsNested; | ||||
|   static const int NumDimensions = traits<LhsXprType>::NumDimensions; | ||||
|   static const int Layout = traits<LhsXprType>::Layout; | ||||
|   enum { Flags = 0 }; | ||||
| }; | ||||
|  | ||||
| template<typename Axis, typename LhsXprType, typename RhsXprType> | ||||
| struct eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorConcatenationOp<Axis, LhsXprType, RhsXprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Axis, typename LhsXprType, typename RhsXprType> | ||||
| struct nested<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, 1, typename eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >::type> | ||||
| { | ||||
|   typedef TensorConcatenationOp<Axis, LhsXprType, RhsXprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
| template<typename Axis, typename LhsXprType, typename RhsXprType> | ||||
| class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, WriteAccessors> | ||||
| { | ||||
|   public: | ||||
|     typedef typename internal::traits<TensorConcatenationOp>::Scalar Scalar; | ||||
|     typedef typename internal::traits<TensorConcatenationOp>::StorageKind StorageKind; | ||||
|     typedef typename internal::traits<TensorConcatenationOp>::Index Index; | ||||
|     typedef typename internal::nested<TensorConcatenationOp>::type Nested; | ||||
|     typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, | ||||
|                                                     typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; | ||||
|     typedef typename NumTraits<Scalar>::Real RealScalar; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis) | ||||
|         : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename LhsXprType::Nested>::type& | ||||
|     lhsExpression() const { return m_lhs_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename RhsXprType::Nested>::type& | ||||
|     rhsExpression() const { return m_rhs_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const TensorConcatenationOp& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     typename LhsXprType::Nested m_lhs_xpr; | ||||
|     typename RhsXprType::Nested m_rhs_xpr; | ||||
|     const Axis m_axis; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename Axis, typename LeftArgType, typename RightArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> | ||||
| { | ||||
|   typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value; | ||||
|   static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<LeftArgType, Device>::Layout, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis()) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     EIGEN_STATIC_ASSERT((NumDims == RightNumDims), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|     eigen_assert(0 <= m_axis && m_axis < NumDims); | ||||
|     const Dimensions& lhs_dims = m_leftImpl.dimensions(); | ||||
|     const Dimensions& rhs_dims = m_rightImpl.dimensions(); | ||||
|     { | ||||
|       int i = 0; | ||||
|       for (; i < m_axis; ++i) { | ||||
|         eigen_assert(lhs_dims[i] > 0); | ||||
|         eigen_assert(lhs_dims[i] == rhs_dims[i]); | ||||
|         m_dimensions[i] = lhs_dims[i]; | ||||
|       } | ||||
|       eigen_assert(lhs_dims[i] > 0);  // Now i == m_axis. | ||||
|       eigen_assert(rhs_dims[i] > 0); | ||||
|       m_dimensions[i] = lhs_dims[i] + rhs_dims[i]; | ||||
|       for (++i; i < NumDims; ++i) { | ||||
|         eigen_assert(lhs_dims[i] > 0); | ||||
|         eigen_assert(lhs_dims[i] == rhs_dims[i]); | ||||
|         m_dimensions[i] = lhs_dims[i]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_leftStrides[0] = 1; | ||||
|       m_rightStrides[0] = 1; | ||||
|       m_outputStrides[0] = 1; | ||||
|  | ||||
|       for (int j = 1; j < NumDims; ++j) { | ||||
|         m_leftStrides[j] = m_leftStrides[j-1] * lhs_dims[j-1]; | ||||
|         m_rightStrides[j] = m_rightStrides[j-1] * rhs_dims[j-1]; | ||||
|         m_outputStrides[j] = m_outputStrides[j-1] * m_dimensions[j-1]; | ||||
|       } | ||||
|     } else { | ||||
|       m_leftStrides[NumDims - 1] = 1; | ||||
|       m_rightStrides[NumDims - 1] = 1; | ||||
|       m_outputStrides[NumDims - 1] = 1; | ||||
|  | ||||
|       for (int j = NumDims - 2; j >= 0; --j) { | ||||
|         m_leftStrides[j] = m_leftStrides[j+1] * lhs_dims[j+1]; | ||||
|         m_rightStrides[j] = m_rightStrides[j+1] * rhs_dims[j+1]; | ||||
|         m_outputStrides[j] = m_outputStrides[j+1] * m_dimensions[j+1]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear? | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) | ||||
|   { | ||||
|     m_leftImpl.evalSubExprsIfNeeded(NULL); | ||||
|     m_rightImpl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() | ||||
|   { | ||||
|     m_leftImpl.cleanup(); | ||||
|     m_rightImpl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow. | ||||
|   // See CL/76180724 comments for more ideas. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     // Collect dimension-wise indices (subs). | ||||
|     array<Index, NumDims> subs; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         subs[i] = index / m_outputStrides[i]; | ||||
|         index -= subs[i] * m_outputStrides[i]; | ||||
|       } | ||||
|       subs[0] = index; | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         subs[i] = index / m_outputStrides[i]; | ||||
|         index -= subs[i] * m_outputStrides[i]; | ||||
|       } | ||||
|       subs[NumDims - 1] = index; | ||||
|     } | ||||
|  | ||||
|     const Dimensions& left_dims = m_leftImpl.dimensions(); | ||||
|     if (subs[m_axis] < left_dims[m_axis]) { | ||||
|       Index left_index; | ||||
|       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|         left_index = subs[0]; | ||||
|         for (int i = 1; i < NumDims; ++i) { | ||||
|           left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; | ||||
|         } | ||||
|       } else { | ||||
|         left_index = subs[NumDims - 1]; | ||||
|         for (int i = NumDims - 2; i >= 0; --i) { | ||||
|           left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; | ||||
|         } | ||||
|       } | ||||
|       return m_leftImpl.coeff(left_index); | ||||
|     } else { | ||||
|       subs[m_axis] -= left_dims[m_axis]; | ||||
|       const Dimensions& right_dims = m_rightImpl.dimensions(); | ||||
|       Index right_index; | ||||
|       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|         right_index = subs[0]; | ||||
|         for (int i = 1; i < NumDims; ++i) { | ||||
|           right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; | ||||
|         } | ||||
|       } else { | ||||
|         right_index = subs[NumDims - 1]; | ||||
|         for (int i = NumDims - 2; i >= 0; --i) { | ||||
|           right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; | ||||
|         } | ||||
|       } | ||||
|       return m_rightImpl.coeff(right_index); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // TODO(phli): Add a real vectorization. | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     const int packetSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); | ||||
|  | ||||
|     EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; | ||||
|     for (int i = 0; i < packetSize; ++i) { | ||||
|       values[i] = coeff(index+i); | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + | ||||
|                                            2 * TensorOpCost::MulCost<Index>() + | ||||
|                                            TensorOpCost::DivCost<Index>() + | ||||
|                                            TensorOpCost::ModCost<Index>()); | ||||
|     const double lhs_size = m_leftImpl.dimensions().TotalSize(); | ||||
|     const double rhs_size = m_rightImpl.dimensions().TotalSize(); | ||||
|     return (lhs_size / (lhs_size + rhs_size)) * | ||||
|                m_leftImpl.costPerCoeff(vectorized) + | ||||
|            (rhs_size / (lhs_size + rhs_size)) * | ||||
|                m_rightImpl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, compute_cost); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|   protected: | ||||
|     Dimensions m_dimensions; | ||||
|     array<Index, NumDims> m_outputStrides; | ||||
|     array<Index, NumDims> m_leftStrides; | ||||
|     array<Index, NumDims> m_rightStrides; | ||||
|     TensorEvaluator<LeftArgType, Device> m_leftImpl; | ||||
|     TensorEvaluator<RightArgType, Device> m_rightImpl; | ||||
|     const Axis m_axis; | ||||
| }; | ||||
|  | ||||
| // Eval as lvalue | ||||
| template<typename Axis, typename LeftArgType, typename RightArgType, typename Device> | ||||
|   struct TensorEvaluator<TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> | ||||
|   : public TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> | ||||
| { | ||||
|   typedef TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> Base; | ||||
|   typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType; | ||||
|   typedef typename Base::Dimensions Dimensions; | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<LeftArgType, Device>::Layout, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device) | ||||
|     : Base(op, device) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|   } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) | ||||
|   { | ||||
|     // Collect dimension-wise indices (subs). | ||||
|     array<Index, Base::NumDims> subs; | ||||
|     for (int i = Base::NumDims - 1; i > 0; --i) { | ||||
|       subs[i] = index / this->m_outputStrides[i]; | ||||
|       index -= subs[i] * this->m_outputStrides[i]; | ||||
|     } | ||||
|     subs[0] = index; | ||||
|  | ||||
|     const Dimensions& left_dims = this->m_leftImpl.dimensions(); | ||||
|     if (subs[this->m_axis] < left_dims[this->m_axis]) { | ||||
|       Index left_index = subs[0]; | ||||
|       for (int i = 1; i < Base::NumDims; ++i) { | ||||
|         left_index += (subs[i] % left_dims[i]) * this->m_leftStrides[i]; | ||||
|       } | ||||
|       return this->m_leftImpl.coeffRef(left_index); | ||||
|     } else { | ||||
|       subs[this->m_axis] -= left_dims[this->m_axis]; | ||||
|       const Dimensions& right_dims = this->m_rightImpl.dimensions(); | ||||
|       Index right_index = subs[0]; | ||||
|       for (int i = 1; i < Base::NumDims; ++i) { | ||||
|         right_index += (subs[i] % right_dims[i]) * this->m_rightStrides[i]; | ||||
|       } | ||||
|       return this->m_rightImpl.coeffRef(right_index); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) | ||||
|   { | ||||
|     const int packetSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); | ||||
|  | ||||
|     EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; | ||||
|     internal::pstore<CoeffReturnType, PacketReturnType>(values, x); | ||||
|     for (int i = 0; i < packetSize; ++i) { | ||||
|       coeffRef(index+i) = values[i]; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H | ||||
							
								
								
									
										628
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										628
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,628 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorContraction | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor contraction class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
|  | ||||
| template<typename Dimensions, typename LhsXprType, typename RhsXprType> | ||||
| struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> > | ||||
| { | ||||
|   // Type promotion to handle the case where the types of the lhs and the rhs are different. | ||||
|   typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type, | ||||
|                                typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar; | ||||
|  | ||||
|   typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, | ||||
|                                         typename traits<RhsXprType>::StorageKind>::ret StorageKind; | ||||
|   typedef typename promote_index_type<typename traits<LhsXprType>::Index, | ||||
|                                       typename traits<RhsXprType>::Index>::type Index; | ||||
|   typedef typename LhsXprType::Nested LhsNested; | ||||
|   typedef typename RhsXprType::Nested RhsNested; | ||||
|   typedef typename remove_reference<LhsNested>::type _LhsNested; | ||||
|   typedef typename remove_reference<RhsNested>::type _RhsNested; | ||||
|  | ||||
|   // From NumDims below. | ||||
|   static const int NumDimensions = traits<RhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value; | ||||
|   static const int Layout = traits<LhsXprType>::Layout; | ||||
|  | ||||
|   enum { | ||||
|     Flags = 0 | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename Dimensions, typename LhsXprType, typename RhsXprType> | ||||
| struct eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorContractionOp<Dimensions, LhsXprType, RhsXprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Dimensions, typename LhsXprType, typename RhsXprType> | ||||
| struct nested<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, 1, typename eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >::type> | ||||
| { | ||||
|   typedef TensorContractionOp<Dimensions, LhsXprType, RhsXprType> type; | ||||
| }; | ||||
|  | ||||
| template<typename Indices_, typename LeftArgType_, typename RightArgType_, typename Device_> | ||||
| struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_, RightArgType_>, Device_> > { | ||||
|   typedef Indices_ Indices; | ||||
|   typedef LeftArgType_ LeftArgType; | ||||
|   typedef RightArgType_ RightArgType; | ||||
|   typedef Device_ Device; | ||||
|  | ||||
|   // From NumDims below. | ||||
|   static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| template<typename Indices, typename LhsXprType, typename RhsXprType> | ||||
| class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXprType, RhsXprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar; | ||||
|   typedef typename internal::gebp_traits<typename LhsXprType::CoeffReturnType, | ||||
|                                                    typename RhsXprType::CoeffReturnType>::ResScalar CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp( | ||||
|       const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims) | ||||
|       : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const Indices& indices() const { return m_indices; } | ||||
|  | ||||
|   /** \returns the nested expressions */ | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename LhsXprType::Nested>::type& | ||||
|   lhsExpression() const { return m_lhs_xpr; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename RhsXprType::Nested>::type& | ||||
|   rhsExpression() const { return m_rhs_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename LhsXprType::Nested m_lhs_xpr; | ||||
|     typename RhsXprType::Nested m_rhs_xpr; | ||||
|     const Indices m_indices; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Derived> | ||||
| struct TensorContractionEvaluatorBase | ||||
| { | ||||
|   typedef typename internal::traits<Derived>::Indices Indices; | ||||
|   typedef typename internal::traits<Derived>::LeftArgType LeftArgType; | ||||
|   typedef typename internal::traits<Derived>::RightArgType RightArgType; | ||||
|   typedef typename internal::traits<Derived>::Device Device; | ||||
|  | ||||
|   typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; | ||||
|   typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = true, | ||||
|     PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), | ||||
|     Layout = TensorEvaluator<LeftArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = true | ||||
|   }; | ||||
|  | ||||
|   // Most of the code is assuming that both input tensors are ColMajor. If the | ||||
|   // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: | ||||
|   // If we want to compute A * B = C, where A is LHS and B is RHS, the code | ||||
|   // will pretend B is LHS and A is RHS. | ||||
|   typedef typename internal::conditional< | ||||
|     static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; | ||||
|   typedef typename internal::conditional< | ||||
|     static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; | ||||
|  | ||||
|   static const int LDims = | ||||
|       internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; | ||||
|   static const int RDims = | ||||
|       internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; | ||||
|   static const int ContractDims = internal::array_size<Indices>::value; | ||||
|   static const int NumDims = LDims + RDims - 2 * ContractDims; | ||||
|  | ||||
|   typedef array<Index, ContractDims> contract_t; | ||||
|   typedef array<Index, LDims - ContractDims> left_nocontract_t; | ||||
|   typedef array<Index, RDims - ContractDims> right_nocontract_t; | ||||
|  | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   TensorContractionEvaluatorBase(const XprType& op, const Device& device) | ||||
|     : m_leftImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(), | ||||
|                           op.lhsExpression(), op.rhsExpression()), device), | ||||
|     m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(), | ||||
|                           op.rhsExpression(), op.lhsExpression()), device), | ||||
|         m_device(device), | ||||
|         m_result(NULL) { | ||||
|     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == | ||||
| 			   static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), | ||||
|                         YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|  | ||||
|     DSizes<Index, LDims> eval_left_dims; | ||||
|     DSizes<Index, RDims> eval_right_dims; | ||||
|     array<IndexPair<Index>, ContractDims> eval_op_indices; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       // For ColMajor, we keep using the existing dimensions | ||||
|       for (int i = 0; i < LDims; i++) { | ||||
|         eval_left_dims[i] = m_leftImpl.dimensions()[i]; | ||||
|       } | ||||
|       for (int i = 0; i < RDims; i++) { | ||||
|         eval_right_dims[i] = m_rightImpl.dimensions()[i]; | ||||
|       } | ||||
|       // We keep the pairs of contracting indices. | ||||
|       for (int i = 0; i < ContractDims; i++) { | ||||
|         eval_op_indices[i].first = op.indices()[i].first; | ||||
|         eval_op_indices[i].second = op.indices()[i].second; | ||||
|       } | ||||
|     } else { | ||||
|       // For RowMajor, we need to reverse the existing dimensions | ||||
|       for (int i = 0; i < LDims; i++) { | ||||
|         eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1]; | ||||
|       } | ||||
|       for (int i = 0; i < RDims; i++) { | ||||
|         eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1]; | ||||
|       } | ||||
|       // We need to flip all the pairs of contracting indices as well as | ||||
|       // reversing the dimensions. | ||||
|       for (int i = 0; i < ContractDims; i++) { | ||||
|         eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second; | ||||
|         eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Check for duplicate axes and make sure the first index in eval_op_indices | ||||
|     // is increasing. Using O(n^2) sorting is OK since ContractDims is small | ||||
|     for (int i = 0; i < ContractDims; i++) { | ||||
|       for (int j = i + 1; j < ContractDims; j++) { | ||||
|         eigen_assert(eval_op_indices[j].first != eval_op_indices[i].first && | ||||
|                      eval_op_indices[j].second != eval_op_indices[i].second && | ||||
|                      "contraction axes should be unique"); | ||||
|         if (eval_op_indices[j].first < eval_op_indices[i].first) { | ||||
|           numext::swap(eval_op_indices[j], eval_op_indices[i]); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     array<Index, LDims> lhs_strides; | ||||
|     lhs_strides[0] = 1; | ||||
|     for (int i = 0; i < LDims-1; ++i) { | ||||
|       lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i]; | ||||
|     } | ||||
|  | ||||
|     array<Index, RDims> rhs_strides; | ||||
|     rhs_strides[0] = 1; | ||||
|     for (int i = 0; i < RDims-1; ++i) { | ||||
|       rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i]; | ||||
|     } | ||||
|  | ||||
|     if (m_i_strides.size() > 0) m_i_strides[0] = 1; | ||||
|     if (m_j_strides.size() > 0) m_j_strides[0] = 1; | ||||
|     if (m_k_strides.size() > 0) m_k_strides[0] = 1; | ||||
|  | ||||
|     m_i_size = 1; | ||||
|     m_j_size = 1; | ||||
|     m_k_size = 1; | ||||
|  | ||||
|     // To compute the dimension, we simply concatenate the non-contracting | ||||
|     // dimensions of the left and then the right tensor. Additionally, we also | ||||
|     // compute the strides corresponding to the left non-contracting | ||||
|     // dimensions and right non-contracting dimensions. | ||||
|     m_lhs_inner_dim_contiguous = true; | ||||
|     int dim_idx = 0; | ||||
|     unsigned int nocontract_idx = 0; | ||||
|  | ||||
|     for (int i = 0; i < LDims; i++) { | ||||
|       // find if we are contracting on index i of left tensor | ||||
|       bool contracting = false; | ||||
|       for (int j = 0; j < ContractDims; j++) { | ||||
|         if (eval_op_indices[j].first == i) { | ||||
|           contracting = true; | ||||
|           break; | ||||
|         } | ||||
|       } | ||||
|       if (!contracting) { | ||||
|         // add dimension size to output dimensions | ||||
|         m_dimensions[dim_idx] = eval_left_dims[i]; | ||||
|         m_left_nocontract_strides[nocontract_idx] = lhs_strides[i]; | ||||
|         if (dim_idx != i) { | ||||
|           m_lhs_inner_dim_contiguous = false; | ||||
|         } | ||||
|         if (nocontract_idx+1 < internal::array_size<left_nocontract_t>::value) { | ||||
|           m_i_strides[nocontract_idx+1] = | ||||
|               m_i_strides[nocontract_idx] * eval_left_dims[i]; | ||||
|         } else { | ||||
|           m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i]; | ||||
|         } | ||||
|         dim_idx++; | ||||
|         nocontract_idx++; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     nocontract_idx = 0; | ||||
|     for (int i = 0; i < RDims; i++) { | ||||
|       bool contracting = false; | ||||
|       // find if we are contracting on index i of right tensor | ||||
|       for (int j = 0; j < ContractDims; j++) { | ||||
|         if (eval_op_indices[j].second == i) { | ||||
|           contracting = true; | ||||
|           break; | ||||
|         } | ||||
|       } | ||||
|       if (!contracting) { | ||||
|         m_dimensions[dim_idx] = eval_right_dims[i]; | ||||
|         if (nocontract_idx+1 < internal::array_size<right_nocontract_t>::value) { | ||||
|           m_j_strides[nocontract_idx+1] = | ||||
|               m_j_strides[nocontract_idx] * eval_right_dims[i]; | ||||
|         } else { | ||||
|           m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i]; | ||||
|         } | ||||
|         m_right_nocontract_strides[nocontract_idx] = rhs_strides[i]; | ||||
|         dim_idx++; | ||||
|         nocontract_idx++; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Now compute the strides corresponding to the contracting dimensions. We | ||||
|     // assumed above that non-contracting axes are represented in the same order | ||||
|     // in the matrix as they are in the tensor. This is not the case for | ||||
|     // contracting axes. As the contracting axes must be of the same size in | ||||
|     // each tensor, we'll only look at the first tensor here. | ||||
|     m_rhs_inner_dim_contiguous = true; | ||||
|     m_rhs_inner_dim_reordered = false; | ||||
|     for (int i = 0; i < ContractDims; i++) { | ||||
|       Index left = eval_op_indices[i].first; | ||||
|       Index right = eval_op_indices[i].second; | ||||
|  | ||||
|       Index size = eval_left_dims[left]; | ||||
|       eigen_assert(size == eval_right_dims[right] && | ||||
|                    "Contraction axes must be same size"); | ||||
|  | ||||
|       if (i+1 < static_cast<int>(internal::array_size<contract_t>::value)) { | ||||
|         m_k_strides[i+1] = m_k_strides[i] * size; | ||||
|       } else { | ||||
|         m_k_size = m_k_strides[i] * size; | ||||
|       } | ||||
|       m_left_contracting_strides[i] = lhs_strides[left]; | ||||
|       m_right_contracting_strides[i] = rhs_strides[right]; | ||||
|  | ||||
|       if (i > 0 && right < eval_op_indices[i-1].second) { | ||||
|         m_rhs_inner_dim_reordered = true; | ||||
|       } | ||||
|       if (right != i) { | ||||
|         m_rhs_inner_dim_contiguous = false; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // If the layout is RowMajor, we need to reverse the m_dimensions | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(RowMajor)) { | ||||
|       for (int i = 0, j = NumDims - 1; i < j; i++, j--) { | ||||
|         numext::swap(m_dimensions[i], m_dimensions[j]); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { | ||||
|     m_leftImpl.evalSubExprsIfNeeded(NULL); | ||||
|     m_rightImpl.evalSubExprsIfNeeded(NULL); | ||||
|     if (data) { | ||||
|       evalTo(data); | ||||
|       return false; | ||||
|     } else { | ||||
|       m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); | ||||
|       evalTo(m_result); | ||||
|       return true; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { | ||||
|     if (this->m_lhs_inner_dim_contiguous) { | ||||
|       if (this->m_rhs_inner_dim_contiguous) { | ||||
|         if (this->m_rhs_inner_dim_reordered) { | ||||
|           static_cast<const Derived*>(this)->template evalProduct<true, true, true, Unaligned>(buffer); | ||||
|         } | ||||
|         else { | ||||
|           static_cast<const Derived*>(this)->template evalProduct<true, true, false, Unaligned>(buffer); | ||||
|         } | ||||
|       } | ||||
|       else { | ||||
|        if (this->m_rhs_inner_dim_reordered) { | ||||
|           static_cast<const Derived*>(this)->template evalProduct<true, false, true, Unaligned>(buffer); | ||||
|         } | ||||
|         else { | ||||
|           static_cast<const Derived*>(this)->template evalProduct<true, false, false, Unaligned>(buffer); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     else { | ||||
|       if (this->m_rhs_inner_dim_contiguous) { | ||||
|         if (this->m_rhs_inner_dim_reordered) { | ||||
|           static_cast<const Derived*>(this)->template evalProduct<false, true, true, Unaligned>(buffer); | ||||
|         } | ||||
|         else { | ||||
|           static_cast<const Derived*>(this)->template evalProduct<false, true, false, Unaligned>(buffer); | ||||
|         } | ||||
|       } | ||||
|       else { | ||||
|        if (this->m_rhs_inner_dim_reordered) { | ||||
|           static_cast<const Derived*>(this)->template evalProduct<false, false, true, Unaligned>(buffer); | ||||
|         } | ||||
|         else { | ||||
|           static_cast<const Derived*>(this)->template evalProduct<false, false, false, Unaligned>(buffer); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> | ||||
|   EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const { | ||||
|     const Index rows = m_i_size; | ||||
|     const Index cols = m_k_size; | ||||
|  | ||||
|     typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; | ||||
|     typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; | ||||
|     typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; | ||||
|     typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; | ||||
|     const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size; | ||||
|     const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size; | ||||
|     const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned; | ||||
|     const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned; | ||||
|     typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, | ||||
|                                                    LeftEvaluator, left_nocontract_t, | ||||
|                                                    contract_t, lhs_packet_size, | ||||
|                                                    lhs_inner_dim_contiguous, | ||||
|                                                    false, lhs_alignment> LhsMapper; | ||||
|  | ||||
|     typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, | ||||
|                                                    RightEvaluator, right_nocontract_t, | ||||
|                                                    contract_t, rhs_packet_size, | ||||
|                                                    rhs_inner_dim_contiguous, | ||||
|                                                    rhs_inner_dim_reordered, rhs_alignment> RhsMapper; | ||||
|  | ||||
|     LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides, | ||||
|                   m_left_contracting_strides, m_k_strides); | ||||
|     RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides, | ||||
|                   m_right_contracting_strides, m_k_strides); | ||||
|  | ||||
|     const Scalar alpha(1); | ||||
|     const Index resIncr(1); | ||||
|  | ||||
|     // zero out the result buffer (which must be of size at least rows * sizeof(Scalar) | ||||
|     m_device.memset(buffer, 0, rows * sizeof(Scalar)); | ||||
|  | ||||
|     internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run( | ||||
|         rows, cols, lhs, rhs, | ||||
|         buffer, resIncr, alpha); | ||||
|   } | ||||
|  | ||||
|   template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> | ||||
|   EIGEN_DEVICE_FUNC void evalGemm(Scalar* buffer) const { | ||||
|     // columns in left side, rows in right side | ||||
|     const Index k = this->m_k_size; | ||||
|  | ||||
|     // rows in left side | ||||
|     const Index m = this->m_i_size; | ||||
|  | ||||
|     // columns in right side | ||||
|     const Index n = this->m_j_size; | ||||
|  | ||||
|     // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) | ||||
|     this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); | ||||
|  | ||||
|     // define mr, nr, and all of my data mapper types | ||||
|     typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; | ||||
|     typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; | ||||
|     typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits; | ||||
|  | ||||
|     const Index nr = Traits::nr; | ||||
|     const Index mr = Traits::mr; | ||||
|  | ||||
|     typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; | ||||
|     typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; | ||||
|  | ||||
|     const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size; | ||||
|     const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size; | ||||
|  | ||||
|     typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, | ||||
|                                                    LeftEvaluator, left_nocontract_t, | ||||
|                                                    contract_t, lhs_packet_size, | ||||
|                                                    lhs_inner_dim_contiguous, | ||||
|                                                    false, Unaligned> LhsMapper; | ||||
|  | ||||
|     typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, | ||||
|                                                    RightEvaluator, right_nocontract_t, | ||||
|                                                    contract_t, rhs_packet_size, | ||||
|                                                    rhs_inner_dim_contiguous, | ||||
|                                                    rhs_inner_dim_reordered, Unaligned> RhsMapper; | ||||
|  | ||||
|     typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; | ||||
|  | ||||
|     // Declare GEBP packing and kernel structs | ||||
|     internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, mr, Traits::LhsProgress, ColMajor> pack_lhs; | ||||
|     internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, nr, ColMajor> pack_rhs; | ||||
|  | ||||
|     internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, mr, nr, false, false> gebp; | ||||
|  | ||||
|     // initialize data mappers | ||||
|     LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, | ||||
|                   this->m_left_contracting_strides, this->m_k_strides); | ||||
|  | ||||
|     RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, | ||||
|                   this->m_right_contracting_strides, this->m_k_strides); | ||||
|  | ||||
|     OutputMapper output(buffer, m); | ||||
|  | ||||
|     // Sizes of the blocks to load in cache. See the Goto paper for details. | ||||
|     internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1); | ||||
|     const Index kc = blocking.kc(); | ||||
|     const Index mc = numext::mini(m, blocking.mc()); | ||||
|     const Index nc = numext::mini(n, blocking.nc()); | ||||
|     const Index sizeA = mc * kc; | ||||
|     const Index sizeB = kc * nc; | ||||
|  | ||||
|     LhsScalar* blockA = static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))); | ||||
|     RhsScalar* blockB = static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))); | ||||
|  | ||||
|     for(Index i2=0; i2<m; i2+=mc) | ||||
|     { | ||||
|       const Index actual_mc = numext::mini(i2+mc,m)-i2; | ||||
|       for (Index k2 = 0; k2 < k; k2 += kc) { | ||||
|         // make sure we don't overshoot right edge of left matrix, then pack vertical panel | ||||
|         const Index actual_kc = numext::mini(k2 + kc, k) - k2; | ||||
|         pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc, 0, 0); | ||||
|  | ||||
|         // series of horizontal blocks | ||||
|         for (Index j2 = 0; j2 < n; j2 += nc) { | ||||
|           // make sure we don't overshoot right edge of right matrix, then pack block | ||||
|           const Index actual_nc = numext::mini(j2 + nc, n) - j2; | ||||
|           pack_rhs(blockB, rhs.getSubMapper(k2, j2), actual_kc, actual_nc, 0, 0); | ||||
|  | ||||
|           // call gebp (matrix kernel) | ||||
|           // The parameters here are copied from Eigen's GEMM implementation | ||||
|           gebp(output.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, Scalar(1), -1, -1, 0, 0); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     this->m_device.deallocate(blockA); | ||||
|     this->m_device.deallocate(blockB); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_leftImpl.cleanup(); | ||||
|     m_rightImpl.cleanup(); | ||||
|  | ||||
|     if (m_result != NULL) { | ||||
|       m_device.deallocate(m_result); | ||||
|       m_result = NULL; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { | ||||
|     return m_result[index]; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { | ||||
|     return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; } | ||||
|  | ||||
|   protected: | ||||
|   // Prevent assignment | ||||
|   TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&); | ||||
|   Dimensions m_dimensions; | ||||
|  | ||||
|   contract_t m_k_strides; | ||||
|   contract_t m_left_contracting_strides; | ||||
|   contract_t m_right_contracting_strides; | ||||
|  | ||||
|   bool m_lhs_inner_dim_contiguous; | ||||
|   bool m_rhs_inner_dim_contiguous; | ||||
|   bool m_rhs_inner_dim_reordered; | ||||
|  | ||||
|   left_nocontract_t m_i_strides; | ||||
|   right_nocontract_t m_j_strides; | ||||
|   left_nocontract_t m_left_nocontract_strides; | ||||
|   right_nocontract_t m_right_nocontract_strides; | ||||
|  | ||||
|   Index m_i_size; | ||||
|   Index m_j_size; | ||||
|   Index m_k_size; | ||||
|  | ||||
|   TensorEvaluator<EvalLeftArgType, Device> m_leftImpl; | ||||
|   TensorEvaluator<EvalRightArgType, Device> m_rightImpl; | ||||
|   const Device& m_device; | ||||
|   Scalar* m_result; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // evaluator for default device | ||||
| template<typename Indices, typename LeftArgType, typename RightArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> : | ||||
|     public TensorContractionEvaluatorBase< | ||||
|       TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> > { | ||||
|   typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; | ||||
|   typedef TensorContractionEvaluatorBase<Self> Base; | ||||
|  | ||||
|   typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; | ||||
|   typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|  | ||||
|   enum { | ||||
|     Layout = TensorEvaluator<LeftArgType, Device>::Layout | ||||
|   }; | ||||
|  | ||||
|   // Most of the code is assuming that both input tensors are ColMajor. If the | ||||
|   // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: | ||||
|   // If we want to compute A * B = C, where A is LHS and B is RHS, the code | ||||
|   // will pretend B is LHS and A is RHS. | ||||
|   typedef typename internal::conditional< | ||||
|     static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; | ||||
|   typedef typename internal::conditional< | ||||
|     static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; | ||||
|  | ||||
|   static const int LDims = | ||||
|       internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; | ||||
|   static const int RDims = | ||||
|       internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; | ||||
|   static const int ContractDims = internal::array_size<Indices>::value; | ||||
|  | ||||
|   typedef array<Index, ContractDims> contract_t; | ||||
|   typedef array<Index, LDims - ContractDims> left_nocontract_t; | ||||
|   typedef array<Index, RDims - ContractDims> right_nocontract_t; | ||||
|  | ||||
|   static const int NumDims = LDims + RDims - 2 * ContractDims; | ||||
|  | ||||
|   // Could we use NumDimensions here? | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : | ||||
|       Base(op, device) { } | ||||
|  | ||||
|   template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> | ||||
|   EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const { | ||||
|     if (this->m_j_size == 1) { | ||||
|       this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     this->template evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H | ||||
							
								
								
									
										56
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
| enum { | ||||
|   ShardByRow = 0, | ||||
|   ShardByCol = 1 | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Default Blocking Strategy | ||||
| template <typename LhsMapper, typename RhsMapper, typename Index, int ShardingType=ShardByCol> | ||||
| class TensorContractionBlocking { | ||||
|  public: | ||||
|  | ||||
|   typedef typename LhsMapper::Scalar LhsScalar; | ||||
|   typedef typename RhsMapper::Scalar RhsScalar; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : | ||||
|       kc_(k), mc_(m), nc_(n) | ||||
|   { | ||||
|     if (ShardingType == ShardByCol) { | ||||
|       computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, mc_, nc_, num_threads); | ||||
|     } | ||||
|     else { | ||||
|       computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, nc_, mc_, num_threads); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } | ||||
|  | ||||
|  private: | ||||
|   Index kc_; | ||||
|   Index mc_; | ||||
|   Index nc_; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace internal | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H | ||||
							
								
								
									
										1391
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1391
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										467
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										467
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,467 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| enum { | ||||
|   Rhs = 0, | ||||
|   Lhs = 1 | ||||
| }; | ||||
|  | ||||
| /* | ||||
|  * Implementation of the Eigen blas_data_mapper class for tensors. | ||||
|  */ | ||||
|  | ||||
| template <typename Tensor, bool HasRawAccess> struct CoeffLoader { | ||||
|   enum { | ||||
|     DirectOffsets = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_tensor(tensor) { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index) { | ||||
|     eigen_assert(false && "unsupported"); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return m_tensor.coeff(index); } | ||||
|  | ||||
|  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|  typename Tensor::PacketReturnType packet(typename Tensor::Index index) const | ||||
|   { | ||||
|     return m_tensor.template packet<LoadMode>(index); | ||||
|   } | ||||
|  | ||||
|  | ||||
|  private: | ||||
|   const Tensor m_tensor; | ||||
| }; | ||||
|  | ||||
| template <typename Tensor> struct CoeffLoader<Tensor, true> { | ||||
|   enum { | ||||
|     DirectOffsets = true | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_data(tensor.data()) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { | ||||
|     m_data += offset; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return loadConstant(m_data+index); } | ||||
|  | ||||
|  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|  typename Tensor::PacketReturnType packet(typename Tensor::Index index) const | ||||
|   { | ||||
|     return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index); | ||||
|   } | ||||
|  private: | ||||
|   typedef typename Tensor::Scalar Scalar; | ||||
|   const Scalar* m_data; | ||||
| }; | ||||
|  | ||||
| template<typename Scalar, typename Index, int side, | ||||
|          typename Tensor, | ||||
|          typename nocontract_t, typename contract_t, | ||||
|          int packet_size, bool inner_dim_contiguous, int Alignment> | ||||
| class SimpleTensorContractionMapper { | ||||
|   public: | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   SimpleTensorContractionMapper(const Tensor& tensor, | ||||
|                                 const nocontract_t& nocontract_strides, | ||||
|                                 const nocontract_t& ij_strides, | ||||
|                                 const contract_t& contract_strides, | ||||
|                                 const contract_t& k_strides) : | ||||
|       m_tensor(tensor), | ||||
|       m_nocontract_strides(nocontract_strides), | ||||
|       m_ij_strides(ij_strides), | ||||
|       m_contract_strides(contract_strides), | ||||
|       m_k_strides(k_strides) { } | ||||
|  | ||||
|   enum { | ||||
|     DirectOffsets = CoeffLoader<Tensor, Tensor::RawAccess>::DirectOffsets | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { | ||||
|     m_tensor.offsetBuffer(offset); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE Scalar operator()(Index row) const { | ||||
|     // column major assumption | ||||
|     return operator()(row, 0); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const { | ||||
|     return m_tensor.coeff(computeIndex(row, col)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const { | ||||
|     const bool left = (side == Lhs); | ||||
|     Index nocontract_val = left ? row : col; | ||||
|     Index linidx = 0; | ||||
|     for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { | ||||
|       const Index idx = nocontract_val / m_ij_strides[i]; | ||||
|       linidx += idx * m_nocontract_strides[i]; | ||||
|       nocontract_val -= idx * m_ij_strides[i]; | ||||
|     } | ||||
|     if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { | ||||
|       if (side == Lhs && inner_dim_contiguous) { | ||||
|         eigen_assert(m_nocontract_strides[0] == 1); | ||||
|         linidx += nocontract_val; | ||||
|       } else { | ||||
|         linidx += nocontract_val * m_nocontract_strides[0]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     Index contract_val = left ? col : row; | ||||
|     if(array_size<contract_t>::value > 0) { | ||||
|       for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { | ||||
|         const Index idx = contract_val / m_k_strides[i]; | ||||
|         linidx += idx * m_contract_strides[i]; | ||||
|         contract_val -= idx * m_k_strides[i]; | ||||
|       } | ||||
|  | ||||
|       if (side == Rhs && inner_dim_contiguous) { | ||||
|         eigen_assert(m_contract_strides[0] == 1); | ||||
|         linidx += contract_val; | ||||
|       } else { | ||||
|         linidx += contract_val * m_contract_strides[0]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     return linidx; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const { | ||||
|     const bool left = (side == Lhs); | ||||
|     Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; | ||||
|     Index linidx[2] = {0, 0}; | ||||
|     if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { | ||||
|       for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { | ||||
|         const Index idx0 = nocontract_val[0] / m_ij_strides[i]; | ||||
|         const Index idx1 = nocontract_val[1] / m_ij_strides[i]; | ||||
|         linidx[0] += idx0 * m_nocontract_strides[i]; | ||||
|         linidx[1] += idx1 * m_nocontract_strides[i]; | ||||
|         nocontract_val[0] -= idx0 * m_ij_strides[i]; | ||||
|         nocontract_val[1] -= idx1 * m_ij_strides[i]; | ||||
|       } | ||||
|       if (side == Lhs && inner_dim_contiguous) { | ||||
|         eigen_assert(m_nocontract_strides[0] == 1); | ||||
|         linidx[0] += nocontract_val[0]; | ||||
|         linidx[1] += nocontract_val[1]; | ||||
|       } else { | ||||
|         linidx[0] += nocontract_val[0] * m_nocontract_strides[0]; | ||||
|         linidx[1] += nocontract_val[1] * m_nocontract_strides[0]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     Index contract_val[2] = {left ? col : row, left ? col : row + distance}; | ||||
|     if (array_size<contract_t>::value> 0) { | ||||
|       for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { | ||||
|         const Index idx0 = contract_val[0] / m_k_strides[i]; | ||||
|         const Index idx1 = contract_val[1] / m_k_strides[i]; | ||||
|         linidx[0] += idx0 * m_contract_strides[i]; | ||||
|         linidx[1] += idx1 * m_contract_strides[i]; | ||||
|         contract_val[0] -= idx0 * m_k_strides[i]; | ||||
|         contract_val[1] -= idx1 * m_k_strides[i]; | ||||
|       } | ||||
|  | ||||
|       if (side == Rhs && inner_dim_contiguous) { | ||||
|         eigen_assert(m_contract_strides[0] == 1); | ||||
|         linidx[0] += contract_val[0]; | ||||
|         linidx[1] += contract_val[1]; | ||||
|       } else { | ||||
|         linidx[0] += contract_val[0] * m_contract_strides[0]; | ||||
|         linidx[1] += contract_val[1] * m_contract_strides[0]; | ||||
|       } | ||||
|     } | ||||
|     return IndexPair<Index>(linidx[0], linidx[1]); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index firstAligned(Index size) const { | ||||
|     // Only claim alignment when we can compute the actual stride (ie when we're | ||||
|     // dealing with the lhs with inner_dim_contiguous. This is because the | ||||
|     // matrix-vector product relies on the stride when dealing with aligned inputs. | ||||
|     return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const { | ||||
|     return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1; | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   CoeffLoader<Tensor, Tensor::RawAccess> m_tensor; | ||||
|   const nocontract_t m_nocontract_strides; | ||||
|   const nocontract_t m_ij_strides; | ||||
|   const contract_t m_contract_strides; | ||||
|   const contract_t m_k_strides; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Scalar, typename Index, int side, | ||||
|          typename Tensor, | ||||
|          typename nocontract_t, typename contract_t, | ||||
|          int packet_size, bool inner_dim_contiguous, | ||||
|          bool inner_dim_reordered, int Alignment> | ||||
| class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment> | ||||
| { | ||||
|  public: | ||||
|   typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment> ParentMapper; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   BaseTensorContractionMapper(const Tensor& tensor, | ||||
|                               const nocontract_t& nocontract_strides, | ||||
|                               const nocontract_t& ij_strides, | ||||
|                               const contract_t& contract_strides, | ||||
|                               const contract_t& k_strides) : | ||||
|   ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } | ||||
|  | ||||
|   typedef typename Tensor::PacketReturnType Packet; | ||||
|   typedef typename unpacket_traits<Packet>::half HalfPacket; | ||||
|  | ||||
|   template <int AlignmentType> | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { | ||||
|     // whole method makes column major assumption | ||||
|  | ||||
|     // don't need to add offsets for now (because operator handles that) | ||||
|     // current code assumes packet size must be a multiple of 2 | ||||
|     EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|     if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) { | ||||
|       const Index index = this->computeIndex(i, j); | ||||
|       eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1); | ||||
|       return this->m_tensor.template packet<AlignmentType>(index); | ||||
|     } | ||||
|  | ||||
|     const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1); | ||||
|     const Index first = indexPair.first; | ||||
|     const Index last = indexPair.second; | ||||
|  | ||||
|     // We can always do optimized packet reads from left hand side right now, because | ||||
|     // the vertical matrix dimension on the left hand side is never contracting. | ||||
|     // On the right hand side we need to check if the contracting dimensions may have | ||||
|     // been shuffled first. | ||||
|     if (Tensor::PacketAccess && | ||||
|         (side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) && | ||||
|         (last - first) == (packet_size - 1)) { | ||||
|  | ||||
|       return this->m_tensor.template packet<AlignmentType>(first); | ||||
|     } | ||||
|  | ||||
|     EIGEN_ALIGN_MAX Scalar data[packet_size]; | ||||
|  | ||||
|     data[0] = this->m_tensor.coeff(first); | ||||
|     for (Index k = 1; k < packet_size - 1; k += 2) { | ||||
|       const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1); | ||||
|       data[k] = this->m_tensor.coeff(internal_pair.first); | ||||
|       data[k + 1] = this->m_tensor.coeff(internal_pair.second); | ||||
|     } | ||||
|     data[packet_size - 1] = this->m_tensor.coeff(last); | ||||
|  | ||||
|     return pload<Packet>(data); | ||||
|   } | ||||
|  | ||||
|   template <int AlignmentType> | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { | ||||
|     // whole method makes column major assumption | ||||
|  | ||||
|     // don't need to add offsets for now (because operator handles that) | ||||
|     const Index half_packet_size = unpacket_traits<HalfPacket>::size; | ||||
|     if (half_packet_size == packet_size) { | ||||
|       return loadPacket<AlignmentType>(i, j); | ||||
|     } | ||||
|     EIGEN_ALIGN_MAX Scalar data[half_packet_size]; | ||||
|     for (Index k = 0; k < half_packet_size; k++) { | ||||
|       data[k] = operator()(i + k, j); | ||||
|     } | ||||
|     return pload<HalfPacket>(data); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Scalar, typename Index, int side, | ||||
|          typename Tensor, | ||||
|          typename nocontract_t, typename contract_t, | ||||
|          bool inner_dim_contiguous, | ||||
|          bool inner_dim_reordered, int Alignment> | ||||
| class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment> | ||||
| { | ||||
|  public: | ||||
|   typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment> ParentMapper; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   BaseTensorContractionMapper(const Tensor& tensor, | ||||
|                               const nocontract_t& nocontract_strides, | ||||
|                               const nocontract_t& ij_strides, | ||||
|                               const contract_t& contract_strides, | ||||
|                               const contract_t& k_strides) : | ||||
|   ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } | ||||
|  | ||||
|   typedef typename Tensor::PacketReturnType Packet; | ||||
|   template <int> EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { | ||||
|     EIGEN_ALIGN_MAX Scalar data[1]; | ||||
|     data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); | ||||
|     return pload<typename Tensor::PacketReturnType>(data); | ||||
|   } | ||||
|   template <int> EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { | ||||
|     return loadPacket(i, j); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Scalar, typename Index, int side, | ||||
|          typename Tensor, | ||||
|          typename nocontract_t, typename contract_t, | ||||
|          int packet_size, | ||||
|          bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> | ||||
| class TensorContractionSubMapper { | ||||
|  public: | ||||
|   typedef typename Tensor::PacketReturnType Packet; | ||||
|   typedef typename unpacket_traits<Packet>::half HalfPacket; | ||||
|  | ||||
|   typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper; | ||||
|   typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self; | ||||
|   typedef Self LinearMapper; | ||||
|  | ||||
|   enum { | ||||
|     // We can use direct offsets iff the parent mapper supports then and we can compute the strides. | ||||
|     // TODO: we should also enable direct offsets for the Rhs case. | ||||
|     UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size<contract_t>::value > 0) | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) | ||||
|       : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { | ||||
|     // Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute | ||||
|     // this offset every time we attempt to access a coefficient. | ||||
|     if (UseDirectOffsets) { | ||||
|       Index stride = m_base_mapper.stride(); | ||||
|       m_base_mapper.offsetBuffer(vert_offset + horiz_offset * stride); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { | ||||
|     if (UseDirectOffsets) { | ||||
|       return m_base_mapper(i, 0); | ||||
|     } | ||||
|     return m_base_mapper(i + m_vert_offset, m_horiz_offset); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { | ||||
|     if (UseDirectOffsets) { | ||||
|       return m_base_mapper(i, j); | ||||
|     } | ||||
|     return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { | ||||
|     if (UseDirectOffsets) { | ||||
|       return m_base_mapper.template loadPacket<Alignment>(i, 0); | ||||
|     } | ||||
|     return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, m_horiz_offset); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { | ||||
|     if (UseDirectOffsets) { | ||||
|       return m_base_mapper.template loadPacket<Alignment>(i, j); | ||||
|     } | ||||
|     return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, j + m_horiz_offset); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { | ||||
|     if (UseDirectOffsets) { | ||||
|       return m_base_mapper.template loadHalfPacket<Alignment>(i, 0); | ||||
|     } | ||||
|     return m_base_mapper.template loadHalfPacket<Alignment>(i + m_vert_offset, m_horiz_offset); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { | ||||
|     if (UseDirectOffsets) { | ||||
|       m_base_mapper.storePacket(i, 0, p); | ||||
|     } | ||||
|     m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { | ||||
|     if (UseDirectOffsets) { | ||||
|       return LinearMapper(m_base_mapper, i, j); | ||||
|     } | ||||
|     return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); | ||||
|   } | ||||
|  | ||||
|   template <typename PacketT, int AlignmentType> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { | ||||
|     EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned; | ||||
|     if (UseDirectOffsets) { | ||||
|      return m_base_mapper.template loadPacket<ActualAlignment>(i, 0); | ||||
|     } | ||||
|     return m_base_mapper.template loadPacket<ActualAlignment>(i + m_vert_offset, m_horiz_offset); | ||||
|   } | ||||
|  | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const { | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   ParentMapper m_base_mapper; | ||||
|   const Index m_vert_offset; | ||||
|   const Index m_horiz_offset; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Scalar_, typename Index, int side, | ||||
|          typename Tensor, | ||||
|          typename nocontract_t, typename contract_t, | ||||
|          int packet_size, | ||||
|          bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> | ||||
| class TensorContractionInputMapper | ||||
|   : public BaseTensorContractionMapper<Scalar_, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> { | ||||
|  | ||||
|  public: | ||||
|   typedef Scalar_ Scalar; | ||||
|   typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base; | ||||
|   typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; | ||||
|   typedef SubMapper VectorMapper; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, | ||||
|                                const nocontract_t& nocontract_strides, | ||||
|                                const nocontract_t& ij_strides, | ||||
|                                const contract_t& contract_strides, | ||||
|                                const contract_t& k_strides) | ||||
|       : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { | ||||
|     return SubMapper(*this, i, j); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { | ||||
|     return VectorMapper(*this, i, j); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| }  // end namespace internal | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H | ||||
							
								
								
									
										1043
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1043
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										279
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										279
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,279 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorConversionOp | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor conversion class. This class makes it possible to vectorize | ||||
|   * type casting operations when the number of scalars per packet in the source | ||||
|   * and the destination type differ | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename TargetType, typename XprType> | ||||
| struct traits<TensorConversionOp<TargetType, XprType> > | ||||
| { | ||||
|   // Type promotion to handle the case where the types of the lhs and the rhs are different. | ||||
|   typedef TargetType Scalar; | ||||
|   typedef typename traits<XprType>::StorageKind StorageKind; | ||||
|   typedef typename traits<XprType>::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = traits<XprType>::NumDimensions; | ||||
|   static const int Layout = traits<XprType>::Layout; | ||||
|   enum { Flags = 0 }; | ||||
| }; | ||||
|  | ||||
| template<typename TargetType, typename XprType> | ||||
| struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorConversionOp<TargetType, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename TargetType, typename XprType> | ||||
| struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type> | ||||
| { | ||||
|   typedef TensorConversionOp<TargetType, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
| template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio> | ||||
| struct PacketConverter { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   PacketConverter(const TensorEvaluator& impl) | ||||
|       : m_impl(impl) {} | ||||
|  | ||||
|   template<int LoadMode, typename Index> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { | ||||
|     return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index)); | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   const TensorEvaluator& m_impl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> | ||||
| struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   PacketConverter(const TensorEvaluator& impl) | ||||
|       : m_impl(impl) {} | ||||
|  | ||||
|   template<int LoadMode, typename Index> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { | ||||
|     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; | ||||
|  | ||||
|     SrcPacket src1 = m_impl.template packet<LoadMode>(index); | ||||
|     SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); | ||||
|     TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2); | ||||
|     return result; | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   const TensorEvaluator& m_impl; | ||||
| }; | ||||
|  | ||||
| template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> | ||||
| struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   PacketConverter(const TensorEvaluator& impl) | ||||
|       : m_impl(impl) {} | ||||
|  | ||||
|   template<int LoadMode, typename Index> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { | ||||
|     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; | ||||
|  | ||||
|     SrcPacket src1 = m_impl.template packet<LoadMode>(index); | ||||
|     SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); | ||||
|     SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize); | ||||
|     SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize); | ||||
|     TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4); | ||||
|     return result; | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   const TensorEvaluator& m_impl; | ||||
| }; | ||||
|  | ||||
| template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> | ||||
| struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   PacketConverter(const TensorEvaluator& impl) | ||||
|       : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} | ||||
|  | ||||
|   template<int LoadMode, typename Index> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { | ||||
|     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; | ||||
|     // Only call m_impl.packet() when we have direct access to the underlying data. This | ||||
|     // ensures that we don't compute the subexpression twice. We may however load some | ||||
|     // coefficients twice, but in practice this doesn't negatively impact performance. | ||||
|     if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) { | ||||
|       // Force unaligned memory loads since we can't ensure alignment anymore | ||||
|       return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index)); | ||||
|     } else { | ||||
|       const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size; | ||||
|       typedef typename internal::unpacket_traits<SrcPacket>::type SrcType; | ||||
|       typedef typename internal::unpacket_traits<TgtPacket>::type TgtType; | ||||
|       internal::scalar_cast_op<SrcType, TgtType> converter; | ||||
|       EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize]; | ||||
|       for (int i = 0; i < TgtPacketSize; ++i) { | ||||
|         values[i] = converter(m_impl.coeff(index+i)); | ||||
|       } | ||||
|       TgtPacket rslt = internal::pload<TgtPacket>(values); | ||||
|       return rslt; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   const TensorEvaluator& m_impl; | ||||
|   const typename TensorEvaluator::Index m_maxIndex; | ||||
| }; | ||||
|  | ||||
| template<typename TargetType, typename XprType> | ||||
| class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|     typedef typename internal::traits<TensorConversionOp>::Scalar Scalar; | ||||
|     typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind; | ||||
|     typedef typename internal::traits<TensorConversionOp>::Index Index; | ||||
|     typedef typename internal::nested<TensorConversionOp>::type Nested; | ||||
|     typedef Scalar CoeffReturnType; | ||||
|     typedef typename NumTraits<Scalar>::Real RealScalar; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) | ||||
|         : m_xpr(xpr) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
| }; | ||||
|  | ||||
| template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) { | ||||
|     impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) { | ||||
|     return impl.evalSubExprsIfNeeded(data); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename TargetType, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorConversionOp<TargetType, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; | ||||
|   typedef TargetType Scalar; | ||||
|   typedef TargetType CoeffReturnType; | ||||
|   typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef typename PacketType<SrcType, Device>::type PacketSourceType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = true, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : m_impl(op.expression(), device) | ||||
|   { | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) | ||||
|   { | ||||
|     return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() | ||||
|   { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     internal::scalar_cast_op<SrcType, TargetType> converter; | ||||
|     return converter(m_impl.coeff(index)); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess & | ||||
|         internal::type_casting_traits<SrcType, TargetType>::VectorizedCast; | ||||
|     return PacketConv<LoadMode, Vectorizable>::run(m_impl, index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>(); | ||||
|     if (vectorized) { | ||||
|       const double SrcCoeffRatio = | ||||
|           internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; | ||||
|       const double TgtCoeffRatio = | ||||
|           internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; | ||||
|       return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) + | ||||
|           TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize)); | ||||
|     } else { | ||||
|       return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|   protected: | ||||
|   template <int LoadMode, bool ActuallyVectorize> | ||||
|   struct PacketConv { | ||||
|     static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { | ||||
|       internal::scalar_cast_op<SrcType, TargetType> converter; | ||||
|       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|       for (int i = 0; i < PacketSize; ++i) { | ||||
|         values[i] = converter(impl.coeff(index+i)); | ||||
|       } | ||||
|       PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|       return rslt; | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   template <int LoadMode> | ||||
|   struct PacketConv<LoadMode, true> { | ||||
|     static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { | ||||
|       const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; | ||||
|       const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; | ||||
|       PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, | ||||
|                       SrcCoeffRatio, TgtCoeffRatio> converter(impl); | ||||
|       return converter.template packet<LoadMode>(index); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H | ||||
							
								
								
									
										1104
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1104
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										212
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										212
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,212 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Rasmus Munk Larsen <rmlarsen@google.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorEvaluator | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief A cost model used to limit the number of threads used for evaluating | ||||
|   * tensor expression. | ||||
|   * | ||||
|   */ | ||||
|  | ||||
| // Class storing the cost of evaluating a tensor expression in terms of the | ||||
| // estimated number of operand bytes loads, bytes stored, and compute cycles. | ||||
| class TensorOpCost { | ||||
|  public: | ||||
|   // TODO(rmlarsen): Fix the scalar op costs in Eigen proper. Even a simple | ||||
|   // model based on minimal reciprocal throughput numbers from Intel or | ||||
|   // Agner Fog's tables would be better than what is there now. | ||||
|   template <typename ArgType> | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() { | ||||
|     return internal::functor_traits< | ||||
|         internal::scalar_product_op<ArgType, ArgType> >::Cost; | ||||
|   } | ||||
|   template <typename ArgType> | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() { | ||||
|     return internal::functor_traits<internal::scalar_sum_op<ArgType> >::Cost; | ||||
|   } | ||||
|   template <typename ArgType> | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() { | ||||
|     return internal::functor_traits< | ||||
|         internal::scalar_quotient_op<ArgType, ArgType> >::Cost; | ||||
|   } | ||||
|   template <typename ArgType> | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() { | ||||
|     return internal::functor_traits<internal::scalar_mod_op<ArgType> >::Cost; | ||||
|   } | ||||
|   template <typename SrcType, typename TargetType> | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() { | ||||
|     return internal::functor_traits< | ||||
|         internal::scalar_cast_op<SrcType, TargetType> >::Cost; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles) | ||||
|       : bytes_loaded_(bytes_loaded), | ||||
|         bytes_stored_(bytes_stored), | ||||
|         compute_cycles_(compute_cycles) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, | ||||
|                bool vectorized, double packet_size) | ||||
|       : bytes_loaded_(bytes_loaded), | ||||
|         bytes_stored_(bytes_stored), | ||||
|         compute_cycles_(vectorized ? compute_cycles / packet_size | ||||
|                                    : compute_cycles) { | ||||
|     eigen_assert(bytes_loaded >= 0 && (numext::isfinite)(bytes_loaded)); | ||||
|     eigen_assert(bytes_stored >= 0 && (numext::isfinite)(bytes_stored)); | ||||
|     eigen_assert(compute_cycles >= 0 && (numext::isfinite)(compute_cycles)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const { | ||||
|     return bytes_loaded_; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_stored() const { | ||||
|     return bytes_stored_; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double compute_cycles() const { | ||||
|     return compute_cycles_; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double total_cost( | ||||
|       double load_cost, double store_cost, double compute_cost) const { | ||||
|     return load_cost * bytes_loaded_ + store_cost * bytes_stored_ + | ||||
|            compute_cost * compute_cycles_; | ||||
|   } | ||||
|  | ||||
|   // Drop memory access component. Intended for cases when memory accesses are | ||||
|   // sequential or are completely masked by computations. | ||||
|   EIGEN_DEVICE_FUNC void dropMemoryCost() { | ||||
|     bytes_loaded_ = 0; | ||||
|     bytes_stored_ = 0; | ||||
|   } | ||||
|  | ||||
|   // TODO(rmlarsen): Define min in terms of total cost, not elementwise. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin( | ||||
|       const TensorOpCost& rhs) const { | ||||
|     double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded()); | ||||
|     double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored()); | ||||
|     double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles()); | ||||
|     return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); | ||||
|   } | ||||
|  | ||||
|   // TODO(rmlarsen): Define max in terms of total cost, not elementwise. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax( | ||||
|       const TensorOpCost& rhs) const { | ||||
|     double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); | ||||
|     double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored()); | ||||
|     double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles()); | ||||
|     return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=( | ||||
|       const TensorOpCost& rhs) { | ||||
|     bytes_loaded_ += rhs.bytes_loaded(); | ||||
|     bytes_stored_ += rhs.bytes_stored(); | ||||
|     compute_cycles_ += rhs.compute_cycles(); | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator*=(double rhs) { | ||||
|     bytes_loaded_ *= rhs; | ||||
|     bytes_stored_ *= rhs; | ||||
|     compute_cycles_ *= rhs; | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator+( | ||||
|       TensorOpCost lhs, const TensorOpCost& rhs) { | ||||
|     lhs += rhs; | ||||
|     return lhs; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( | ||||
|       TensorOpCost lhs, double rhs) { | ||||
|     lhs *= rhs; | ||||
|     return lhs; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( | ||||
|       double lhs, TensorOpCost rhs) { | ||||
|     rhs *= lhs; | ||||
|     return rhs; | ||||
|   } | ||||
|  | ||||
|   friend std::ostream& operator<<(std::ostream& os, const TensorOpCost& tc) { | ||||
|     return os << "[bytes_loaded = " << tc.bytes_loaded() | ||||
|               << ", bytes_stored = " << tc.bytes_stored() | ||||
|               << ", compute_cycles = " << tc.compute_cycles() << "]"; | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   double bytes_loaded_; | ||||
|   double bytes_stored_; | ||||
|   double compute_cycles_; | ||||
| }; | ||||
|  | ||||
| // TODO(rmlarsen): Implement a policy that chooses an "optimal" number of theads | ||||
| // in [1:max_threads] instead of just switching multi-threading off for small | ||||
| // work units. | ||||
| template <typename Device> | ||||
| class TensorCostModel { | ||||
|  public: | ||||
|   // Scaling from Eigen compute cost to device cycles. | ||||
|   static const int kDeviceCyclesPerComputeCycle = 1; | ||||
|  | ||||
|  // Costs in device cycles. | ||||
|   static const int kStartupCycles = 100000; | ||||
|   static const int kPerThreadCycles = 100000; | ||||
|   static const int kTaskSize = 40000; | ||||
|  | ||||
|   // Returns the number of threads in [1:max_threads] to use for | ||||
|   // evaluating an expression with the given output size and cost per | ||||
|   // coefficient. | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads( | ||||
|       double output_size, const TensorOpCost& cost_per_coeff, int max_threads) { | ||||
|     double cost = totalCost(output_size, cost_per_coeff); | ||||
|     int threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9; | ||||
|     return numext::mini(max_threads, numext::maxi(1, threads)); | ||||
|   } | ||||
|  | ||||
|   // taskSize assesses parallel task size. | ||||
|   // Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task | ||||
|   // granularity needs to be increased to mitigate parallelization overheads. | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double taskSize( | ||||
|       double output_size, const TensorOpCost& cost_per_coeff) { | ||||
|     return totalCost(output_size, cost_per_coeff) / kTaskSize; | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost( | ||||
|       double output_size, const TensorOpCost& cost_per_coeff) { | ||||
|     // Cost of memory fetches from L2 cache. 64 is typical cache line size. | ||||
|     // 11 is L2 cache latency on Haswell. | ||||
|     // We don't know whether data is in L1, L2 or L3. But we are most interested | ||||
|     // in single-threaded computational time around 100us-10ms (smaller time | ||||
|     // is too small for parallelization, larger time is not intersting | ||||
|     // either because we are probably using all available threads already). | ||||
|     // And for the target time range, L2 seems to be what matters. Data set | ||||
|     // fitting into L1 is too small to take noticeable time. Data set fitting | ||||
|     // only into L3 presumably will take more than 10ms to load and process. | ||||
|     const double kLoadCycles = 1.0 / 64 * 11; | ||||
|     const double kStoreCycles = 1.0 / 64 * 11; | ||||
|     // Scaling from Eigen compute cost to device cycles. | ||||
|     return output_size * | ||||
|         cost_per_coeff.total_cost(kLoadCycles, kStoreCycles, | ||||
|                                   kDeviceCyclesPerComputeCycle); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H | ||||
							
								
								
									
										313
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										313
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,313 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorCustomUnaryOp | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor custom class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename CustomUnaryFunc, typename XprType> | ||||
| struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> > | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::StorageKind StorageKind; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = traits<XprType>::NumDimensions; | ||||
|   static const int Layout = traits<XprType>::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename CustomUnaryFunc, typename XprType> | ||||
| struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename CustomUnaryFunc, typename XprType> | ||||
| struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType> > | ||||
| { | ||||
|   typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename CustomUnaryFunc, typename XprType> | ||||
| class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename internal::nested<TensorCustomUnaryOp>::type Nested; | ||||
|   typedef typename internal::traits<TensorCustomUnaryOp>::StorageKind StorageKind; | ||||
|   typedef typename internal::traits<TensorCustomUnaryOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func) | ||||
|       : m_expr(expr), m_func(func) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const CustomUnaryFunc& func() const { return m_func; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|   expression() const { return m_expr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_expr; | ||||
|     const CustomUnaryFunc m_func; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename CustomUnaryFunc, typename XprType, typename Device> | ||||
| struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device> | ||||
| { | ||||
|   typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType; | ||||
|   typedef typename internal::traits<ArgType>::Index Index; | ||||
|   static const int NumDims = internal::traits<ArgType>::NumDimensions; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar; | ||||
|   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = (internal::packet_traits<Scalar>::size > 1), | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<XprType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device) | ||||
|       : m_op(op), m_device(device), m_result(NULL) | ||||
|   { | ||||
|     m_dimensions = op.func().dimensions(op.expression()); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { | ||||
|     if (data) { | ||||
|       evalTo(data); | ||||
|       return false; | ||||
|     } else { | ||||
|       m_result = static_cast<CoeffReturnType*>( | ||||
|           m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); | ||||
|       evalTo(m_result); | ||||
|       return true; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     if (m_result != NULL) { | ||||
|       m_device.deallocate(m_result); | ||||
|       m_result = NULL; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { | ||||
|     return m_result[index]; | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { | ||||
|     return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { | ||||
|     TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result( | ||||
|         data, m_dimensions); | ||||
|     m_op.func().eval(m_op.expression(), result, m_device); | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|   const ArgType m_op; | ||||
|   const Device& m_device; | ||||
|   CoeffReturnType* m_result; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| /** \class TensorCustomBinaryOp | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor custom class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> | ||||
| struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> > | ||||
| { | ||||
|   typedef typename internal::promote_storage_type<typename LhsXprType::Scalar, | ||||
|                                                   typename RhsXprType::Scalar>::ret Scalar; | ||||
|   typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, | ||||
|                                                   typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; | ||||
|   typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, | ||||
|                                         typename traits<RhsXprType>::StorageKind>::ret StorageKind; | ||||
|   typedef typename promote_index_type<typename traits<LhsXprType>::Index, | ||||
|                                       typename traits<RhsXprType>::Index>::type Index; | ||||
|   typedef typename LhsXprType::Nested LhsNested; | ||||
|   typedef typename RhsXprType::Nested RhsNested; | ||||
|   typedef typename remove_reference<LhsNested>::type _LhsNested; | ||||
|   typedef typename remove_reference<RhsNested>::type _RhsNested; | ||||
|   static const int NumDimensions = traits<LhsXprType>::NumDimensions; | ||||
|   static const int Layout = traits<LhsXprType>::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> | ||||
| struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> | ||||
| struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> > | ||||
| { | ||||
|   typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> | ||||
| class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename internal::traits<TensorCustomBinaryOp>::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename internal::nested<TensorCustomBinaryOp>::type Nested; | ||||
|   typedef typename internal::traits<TensorCustomBinaryOp>::StorageKind StorageKind; | ||||
|   typedef typename internal::traits<TensorCustomBinaryOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func) | ||||
|  | ||||
|       : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const CustomBinaryFunc& func() const { return m_func; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename LhsXprType::Nested>::type& | ||||
|   lhsExpression() const { return m_lhs_xpr; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename RhsXprType::Nested>::type& | ||||
|   rhsExpression() const { return m_rhs_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename LhsXprType::Nested m_lhs_xpr; | ||||
|     typename RhsXprType::Nested m_rhs_xpr; | ||||
|     const CustomBinaryFunc m_func; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device> | ||||
| struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device> | ||||
| { | ||||
|   typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType; | ||||
|   typedef typename internal::traits<XprType>::Index Index; | ||||
|   static const int NumDims = internal::traits<XprType>::NumDimensions; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = (internal::packet_traits<Scalar>::size > 1), | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<LhsXprType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_op(op), m_device(device), m_result(NULL) | ||||
|   { | ||||
|     m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression()); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { | ||||
|     if (data) { | ||||
|       evalTo(data); | ||||
|       return false; | ||||
|     } else { | ||||
|       m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); | ||||
|       evalTo(m_result); | ||||
|       return true; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     if (m_result != NULL) { | ||||
|       m_device.deallocate(m_result); | ||||
|       m_result = NULL; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { | ||||
|     return m_result[index]; | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { | ||||
|     return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { | ||||
|     TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions); | ||||
|     m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|   const XprType m_op; | ||||
|   const Device& m_device; | ||||
|   CoeffReturnType* m_result; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H | ||||
							
								
								
									
										68
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,68 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorDevice | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Pseudo expression providing an operator = that will evaluate its argument | ||||
|   * on the specified computing 'device' (GPU, thread pool, ...) | ||||
|   * | ||||
|   * Example: | ||||
|   *    C.device(EIGEN_GPU) = A + B; | ||||
|   * | ||||
|   * Todo: operator *= and /=. | ||||
|   */ | ||||
|  | ||||
| template <typename ExpressionType, typename DeviceType> class TensorDevice { | ||||
|   public: | ||||
|     TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { | ||||
|       typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; | ||||
|       Assign assign(m_expression, other); | ||||
|       internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { | ||||
|       typedef typename OtherDerived::Scalar Scalar; | ||||
|       typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum; | ||||
|       Sum sum(m_expression, other); | ||||
|       typedef TensorAssignOp<ExpressionType, const Sum> Assign; | ||||
|       Assign assign(m_expression, sum); | ||||
|       internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { | ||||
|       typedef typename OtherDerived::Scalar Scalar; | ||||
|       typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference; | ||||
|       Difference difference(m_expression, other); | ||||
|       typedef TensorAssignOp<ExpressionType, const Difference> Assign; | ||||
|       Assign assign(m_expression, difference); | ||||
|       internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     const DeviceType& m_device; | ||||
|     ExpressionType& m_expression; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H | ||||
							
								
								
									
										337
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										337
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,337 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H) | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| static const int kCudaScratchSize = 1024; | ||||
|  | ||||
| // This defines an interface that GPUDevice can take to use | ||||
| // CUDA streams underneath. | ||||
| class StreamInterface { | ||||
|  public: | ||||
|   virtual ~StreamInterface() {} | ||||
|  | ||||
|   virtual const cudaStream_t& stream() const = 0; | ||||
|   virtual const cudaDeviceProp& deviceProperties() const = 0; | ||||
|  | ||||
|   // Allocate memory on the actual device where the computation will run | ||||
|   virtual void* allocate(size_t num_bytes) const = 0; | ||||
|   virtual void deallocate(void* buffer) const = 0; | ||||
|  | ||||
|   // Return a scratchpad buffer of size 1k | ||||
|   virtual void* scratchpad() const = 0; | ||||
|  | ||||
|   // Return a semaphore. The semaphore is initially initialized to 0, and | ||||
|   // each kernel using it is responsible for resetting to 0 upon completion | ||||
|   // to maintain the invariant that the semaphore is always equal to 0 upon | ||||
|   // each kernel start. | ||||
|   virtual unsigned int* semaphore() const = 0; | ||||
| }; | ||||
|  | ||||
| static cudaDeviceProp* m_deviceProperties; | ||||
| static bool m_devicePropInitialized = false; | ||||
|  | ||||
| static void initializeDeviceProp() { | ||||
|   if (!m_devicePropInitialized) { | ||||
|     // Attempts to ensure proper behavior in the case of multiple threads | ||||
|     // calling this function simultaneously. This would be trivial to | ||||
|     // implement if we could use std::mutex, but unfortunately mutex don't | ||||
|     // compile with nvcc, so we resort to atomics and thread fences instead. | ||||
|     // Note that if the caller uses a compiler that doesn't support c++11 we | ||||
|     // can't ensure that the initialization is thread safe. | ||||
| #if __cplusplus >= 201103L | ||||
|     static std::atomic<bool> first(true); | ||||
|     if (first.exchange(false)) { | ||||
| #else | ||||
|     static bool first = true; | ||||
|     if (first) { | ||||
|       first = false; | ||||
| #endif | ||||
|       // We're the first thread to reach this point. | ||||
|       int num_devices; | ||||
|       cudaError_t status = cudaGetDeviceCount(&num_devices); | ||||
|       if (status != cudaSuccess) { | ||||
|         std::cerr << "Failed to get the number of CUDA devices: " | ||||
|                   << cudaGetErrorString(status) | ||||
|                   << std::endl; | ||||
|         assert(status == cudaSuccess); | ||||
|       } | ||||
|       m_deviceProperties = new cudaDeviceProp[num_devices]; | ||||
|       for (int i = 0; i < num_devices; ++i) { | ||||
|         status = cudaGetDeviceProperties(&m_deviceProperties[i], i); | ||||
|         if (status != cudaSuccess) { | ||||
|           std::cerr << "Failed to initialize CUDA device #" | ||||
|                     << i | ||||
|                     << ": " | ||||
|                     << cudaGetErrorString(status) | ||||
|                     << std::endl; | ||||
|           assert(status == cudaSuccess); | ||||
|         } | ||||
|       } | ||||
|  | ||||
| #if __cplusplus >= 201103L | ||||
|       std::atomic_thread_fence(std::memory_order_release); | ||||
| #endif | ||||
|       m_devicePropInitialized = true; | ||||
|     } else { | ||||
|       // Wait for the other thread to inititialize the properties. | ||||
|       while (!m_devicePropInitialized) { | ||||
| #if __cplusplus >= 201103L | ||||
|         std::atomic_thread_fence(std::memory_order_acquire); | ||||
| #endif | ||||
|         sleep(1); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| static const cudaStream_t default_stream = cudaStreamDefault; | ||||
|  | ||||
| class CudaStreamDevice : public StreamInterface { | ||||
|  public: | ||||
|   // Use the default stream on the current device | ||||
|   CudaStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) { | ||||
|     cudaGetDevice(&device_); | ||||
|     initializeDeviceProp(); | ||||
|   } | ||||
|   // Use the default stream on the specified device | ||||
|   CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) { | ||||
|     initializeDeviceProp(); | ||||
|   } | ||||
|   // Use the specified stream. Note that it's the | ||||
|   // caller responsibility to ensure that the stream can run on | ||||
|   // the specified device. If no device is specified the code | ||||
|   // assumes that the stream is associated to the current gpu device. | ||||
|   CudaStreamDevice(const cudaStream_t* stream, int device = -1) | ||||
|       : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) { | ||||
|     if (device < 0) { | ||||
|       cudaGetDevice(&device_); | ||||
|     } else { | ||||
|       int num_devices; | ||||
|       cudaError_t err = cudaGetDeviceCount(&num_devices); | ||||
|       EIGEN_UNUSED_VARIABLE(err) | ||||
|       assert(err == cudaSuccess); | ||||
|       assert(device < num_devices); | ||||
|       device_ = device; | ||||
|     } | ||||
|     initializeDeviceProp(); | ||||
|   } | ||||
|  | ||||
|   virtual ~CudaStreamDevice() { | ||||
|     if (scratch_) { | ||||
|       deallocate(scratch_); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   const cudaStream_t& stream() const { return *stream_; } | ||||
|   const cudaDeviceProp& deviceProperties() const { | ||||
|     return m_deviceProperties[device_]; | ||||
|   } | ||||
|   virtual void* allocate(size_t num_bytes) const { | ||||
|     cudaError_t err = cudaSetDevice(device_); | ||||
|     EIGEN_UNUSED_VARIABLE(err) | ||||
|     assert(err == cudaSuccess); | ||||
|     void* result; | ||||
|     err = cudaMalloc(&result, num_bytes); | ||||
|     assert(err == cudaSuccess); | ||||
|     assert(result != NULL); | ||||
|     return result; | ||||
|   } | ||||
|   virtual void deallocate(void* buffer) const { | ||||
|     cudaError_t err = cudaSetDevice(device_); | ||||
|     EIGEN_UNUSED_VARIABLE(err) | ||||
|     assert(err == cudaSuccess); | ||||
|     assert(buffer != NULL); | ||||
|     err = cudaFree(buffer); | ||||
|     assert(err == cudaSuccess); | ||||
|   } | ||||
|  | ||||
|   virtual void* scratchpad() const { | ||||
|     if (scratch_ == NULL) { | ||||
|       scratch_ = allocate(kCudaScratchSize + sizeof(unsigned int)); | ||||
|     } | ||||
|     return scratch_; | ||||
|   } | ||||
|  | ||||
|   virtual unsigned int* semaphore() const { | ||||
|     if (semaphore_ == NULL) { | ||||
|       char* scratch = static_cast<char*>(scratchpad()) + kCudaScratchSize; | ||||
|       semaphore_ = reinterpret_cast<unsigned int*>(scratch); | ||||
|       cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_); | ||||
|       EIGEN_UNUSED_VARIABLE(err) | ||||
|       assert(err == cudaSuccess); | ||||
|     } | ||||
|     return semaphore_; | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   const cudaStream_t* stream_; | ||||
|   int device_; | ||||
|   mutable void* scratch_; | ||||
|   mutable unsigned int* semaphore_; | ||||
| }; | ||||
|  | ||||
| struct GpuDevice { | ||||
|   // The StreamInterface is not owned: the caller is | ||||
|   // responsible for its initialization and eventual destruction. | ||||
|   explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) { | ||||
|     eigen_assert(stream); | ||||
|   } | ||||
|   explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) { | ||||
|     eigen_assert(stream); | ||||
|   } | ||||
|   // TODO(bsteiner): This is an internal API, we should not expose it. | ||||
|   EIGEN_STRONG_INLINE const cudaStream_t& stream() const { | ||||
|     return stream_->stream(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { | ||||
|     return stream_->allocate(num_bytes); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void deallocate(void* buffer) const { | ||||
|     stream_->deallocate(buffer); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void* scratchpad() const { | ||||
|     return stream_->scratchpad(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE unsigned int* semaphore() const { | ||||
|     return stream_->semaphore(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { | ||||
| #ifndef __CUDA_ARCH__ | ||||
|     cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, | ||||
|                                       stream_->stream()); | ||||
|     EIGEN_UNUSED_VARIABLE(err) | ||||
|     assert(err == cudaSuccess); | ||||
| #else | ||||
|   eigen_assert(false && "The default device should be used instead to generate kernel code"); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { | ||||
|     cudaError_t err = | ||||
|         cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); | ||||
|     EIGEN_UNUSED_VARIABLE(err) | ||||
|     assert(err == cudaSuccess); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { | ||||
|     cudaError_t err = | ||||
|         cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); | ||||
|     EIGEN_UNUSED_VARIABLE(err) | ||||
|     assert(err == cudaSuccess); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { | ||||
| #ifndef __CUDA_ARCH__ | ||||
|     cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream()); | ||||
|     EIGEN_UNUSED_VARIABLE(err) | ||||
|     assert(err == cudaSuccess); | ||||
| #else | ||||
|   eigen_assert(false && "The default device should be used instead to generate kernel code"); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE size_t numThreads() const { | ||||
|     // FIXME | ||||
|     return 32; | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { | ||||
|     // FIXME | ||||
|     return 48*1024; | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { | ||||
|     // We won't try to take advantage of the l2 cache for the time being, and | ||||
|     // there is no l3 cache on cuda devices. | ||||
|     return firstLevelCacheSize(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { | ||||
| #if defined(__CUDACC__) && !defined(__CUDA_ARCH__) | ||||
|     cudaError_t err = cudaStreamSynchronize(stream_->stream()); | ||||
|     if (err != cudaSuccess) { | ||||
|       std::cerr << "Error detected in CUDA stream: " | ||||
|                 << cudaGetErrorString(err) | ||||
|                 << std::endl; | ||||
|       assert(err == cudaSuccess); | ||||
|     } | ||||
| #else | ||||
|     assert(false && "The default device should be used instead to generate kernel code"); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { | ||||
|     return stream_->deviceProperties().multiProcessorCount; | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { | ||||
|     return stream_->deviceProperties().maxThreadsPerBlock; | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { | ||||
|     return stream_->deviceProperties().maxThreadsPerMultiProcessor; | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE int sharedMemPerBlock() const { | ||||
|     return stream_->deviceProperties().sharedMemPerBlock; | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE int majorDeviceVersion() const { | ||||
|     return stream_->deviceProperties().major; | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE int minorDeviceVersion() const { | ||||
|     return stream_->deviceProperties().minor; | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE int maxBlocks() const { | ||||
|     return max_blocks_; | ||||
|   } | ||||
|  | ||||
|   // This function checks if the CUDA runtime recorded an error for the | ||||
|   // underlying stream device. | ||||
|   inline bool ok() const { | ||||
| #ifdef __CUDACC__ | ||||
|     cudaError_t error = cudaStreamQuery(stream_->stream()); | ||||
|     return (error == cudaSuccess) || (error == cudaErrorNotReady); | ||||
| #else | ||||
|     return false; | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   const StreamInterface* stream_; | ||||
|   int max_blocks_; | ||||
| }; | ||||
|  | ||||
| #define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...)             \ | ||||
|   (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__);   \ | ||||
|   assert(cudaGetLastError() == cudaSuccess); | ||||
|  | ||||
|  | ||||
| // FIXME: Should be device and kernel specific. | ||||
| #ifdef __CUDACC__ | ||||
| static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { | ||||
| #ifndef __CUDA_ARCH__ | ||||
|   cudaError_t status = cudaDeviceSetSharedMemConfig(config); | ||||
|   EIGEN_UNUSED_VARIABLE(status) | ||||
|   assert(status == cudaSuccess); | ||||
| #else | ||||
|   EIGEN_UNUSED_VARIABLE(config) | ||||
| #endif | ||||
| } | ||||
| #endif | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H | ||||
							
								
								
									
										81
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| // Default device for the machine (typically a single cpu core) | ||||
| struct DefaultDevice { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { | ||||
|     return internal::aligned_malloc(num_bytes); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { | ||||
|     internal::aligned_free(buffer); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { | ||||
|     ::memcpy(dst, src, n); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { | ||||
|     memcpy(dst, src, n); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { | ||||
|     memcpy(dst, src, n); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { | ||||
|     ::memset(buffer, c, n); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { | ||||
| #ifndef __CUDA_ARCH__ | ||||
|     // Running on the host CPU | ||||
|     return 1; | ||||
| #else | ||||
|     // Running on a CUDA device | ||||
|     return 32; | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { | ||||
| #ifndef __CUDA_ARCH__ | ||||
|     // Running on the host CPU | ||||
|     return l1CacheSize(); | ||||
| #else | ||||
|     // Running on a CUDA device, return the amount of shared memory available. | ||||
|     return 48*1024; | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { | ||||
| #ifndef __CUDA_ARCH__ | ||||
|     // Running single threaded on the host CPU | ||||
|     return l3CacheSize(); | ||||
| #else | ||||
|     // Running on a CUDA device | ||||
|     return firstLevelCacheSize(); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { | ||||
| #ifndef __CUDA_ARCH__ | ||||
|     // Running single threaded on the host CPU | ||||
|     // Should return an enum that encodes the ISA supported by the CPU | ||||
|     return 1; | ||||
| #else | ||||
|     // Running on a CUDA device | ||||
|     return __CUDA_ARCH__ / 100; | ||||
| #endif | ||||
|   } | ||||
| }; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H | ||||
							
								
								
									
										122
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
|  | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H) | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H | ||||
|  | ||||
| namespace Eigen { | ||||
| struct SyclDevice { | ||||
|   /// class members | ||||
|   /// sycl queue | ||||
|   mutable cl::sycl::queue m_queue; | ||||
|   /// std::map is the container used to make sure that we create only one buffer | ||||
|   /// per pointer. The lifespan of the buffer now depends on the lifespan of SyclDevice. | ||||
|   /// If a non-read-only pointer is needed to be accessed on the host we should manually deallocate it. | ||||
|   mutable std::map<const void *, std::shared_ptr<void>> buffer_map; | ||||
|   /// creating device by using selector | ||||
|   template<typename dev_Selector> SyclDevice(dev_Selector s) | ||||
|   : | ||||
| #ifdef EIGEN_EXCEPTIONS | ||||
|   m_queue(cl::sycl::queue(s, [=](cl::sycl::exception_list l) { | ||||
|     for (const auto& e : l) { | ||||
|       try { | ||||
|         std::rethrow_exception(e); | ||||
|       } catch (cl::sycl::exception e) { | ||||
|           std::cout << e.what() << std::endl; | ||||
|         } | ||||
|     } | ||||
|   })) | ||||
| #else | ||||
|   m_queue(cl::sycl::queue(s)) | ||||
| #endif | ||||
|   {} | ||||
|   // destructor | ||||
|   ~SyclDevice() { deallocate_all(); } | ||||
|  | ||||
|   template <typename T> void deallocate(T *p) const { | ||||
|     auto it = buffer_map.find(p); | ||||
|     if (it != buffer_map.end()) { | ||||
|       buffer_map.erase(it); | ||||
|       internal::aligned_free(p); | ||||
|     } | ||||
|   } | ||||
|   void deallocate_all() const { | ||||
|     std::map<const void *, std::shared_ptr<void>>::iterator it=buffer_map.begin(); | ||||
|     while (it!=buffer_map.end()) { | ||||
|       auto p=it->first; | ||||
|       buffer_map.erase(it); | ||||
|       internal::aligned_free(const_cast<void*>(p)); | ||||
|       it=buffer_map.begin(); | ||||
|     } | ||||
|     buffer_map.clear(); | ||||
|   } | ||||
|  | ||||
|   /// creation of sycl accessor for a buffer. This function first tries to find | ||||
|   /// the buffer in the buffer_map. If found it gets the accessor from it, if not, | ||||
|   ///the function then adds an entry by creating a sycl buffer for that particular pointer. | ||||
|   template <cl::sycl::access::mode AcMd, typename T> inline cl::sycl::accessor<T, 1, AcMd, cl::sycl::access::target::global_buffer> | ||||
|   get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, const T * ptr) const { | ||||
|     return (get_sycl_buffer<T>(num_bytes, ptr)->template get_access<AcMd, cl::sycl::access::target::global_buffer>(cgh)); | ||||
|   } | ||||
|  | ||||
|   template<typename T> inline  std::pair<std::map<const void *, std::shared_ptr<void>>::iterator,bool> add_sycl_buffer(const T *ptr, size_t num_bytes) const { | ||||
|     using Type = cl::sycl::buffer<T, 1>; | ||||
|     std::pair<std::map<const void *, std::shared_ptr<void>>::iterator,bool> ret = buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(ptr, std::shared_ptr<void>(new Type(cl::sycl::range<1>(num_bytes)), | ||||
|       [](void *dataMem) { delete static_cast<Type*>(dataMem); }))); | ||||
|     (static_cast<Type*>(buffer_map.at(ptr).get()))->set_final_data(nullptr); | ||||
|     return ret; | ||||
|   } | ||||
|  | ||||
|   template <typename T> inline cl::sycl::buffer<T, 1>* get_sycl_buffer(size_t num_bytes,const T * ptr) const { | ||||
|     return static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(ptr, num_bytes).first->second.get()); | ||||
|   } | ||||
|  | ||||
|   /// allocating memory on the cpu | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t) const { | ||||
|     return internal::aligned_malloc(8); | ||||
|   } | ||||
|  | ||||
|   // some runtime conditions that can be applied here | ||||
|   bool isDeviceSuitable() const { return true; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const { | ||||
|     ::memcpy(dst, src, n); | ||||
|   } | ||||
|  | ||||
|   template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const { | ||||
|     auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>(); | ||||
|     memcpy(host_acc.get_pointer(), src, n); | ||||
|   } | ||||
|  /// whith the current implementation of sycl, the data is copied twice from device to host. This will be fixed soon. | ||||
|   template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(T *dst, const T *src, size_t n) const { | ||||
|     auto it = buffer_map.find(src); | ||||
|     if (it != buffer_map.end()) { | ||||
|       auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(it->second.get()))-> template get_access<cl::sycl::access::mode::read, cl::sycl::access::target::host_buffer>(); | ||||
|       memcpy(dst,host_acc.get_pointer(),  n); | ||||
|     } else{ | ||||
|       eigen_assert("no device memory found. The memory might be destroyed before creation"); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c, size_t n) const { | ||||
|     ::memset(buffer, c, n); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { | ||||
|   return 1; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H | ||||
							
								
								
									
										282
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										282
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,282 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H) | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| // Use the SimpleThreadPool by default. We'll switch to the new non blocking | ||||
| // thread pool later. | ||||
| #ifndef EIGEN_USE_SIMPLE_THREAD_POOL | ||||
| template <typename Env> using ThreadPoolTempl = NonBlockingThreadPoolTempl<Env>; | ||||
| typedef NonBlockingThreadPool ThreadPool; | ||||
| #else | ||||
| template <typename Env> using ThreadPoolTempl = SimpleThreadPoolTempl<Env>; | ||||
| typedef SimpleThreadPool ThreadPool; | ||||
| #endif | ||||
|  | ||||
|  | ||||
| // Barrier is an object that allows one or more threads to wait until | ||||
| // Notify has been called a specified number of times. | ||||
| class Barrier { | ||||
|  public: | ||||
|   Barrier(unsigned int count) : state_(count << 1), notified_(false) { | ||||
|     eigen_assert(((count << 1) >> 1) == count); | ||||
|   } | ||||
|   ~Barrier() { | ||||
|     eigen_assert((state_>>1) == 0); | ||||
|   } | ||||
|  | ||||
|   void Notify() { | ||||
|     unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; | ||||
|     if (v != 1) { | ||||
|       eigen_assert(((v + 2) & ~1) != 0); | ||||
|       return;  // either count has not dropped to 0, or waiter is not waiting | ||||
|     } | ||||
|     std::unique_lock<std::mutex> l(mu_); | ||||
|     eigen_assert(!notified_); | ||||
|     notified_ = true; | ||||
|     cv_.notify_all(); | ||||
|   } | ||||
|  | ||||
|   void Wait() { | ||||
|     unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); | ||||
|     if ((v >> 1) == 0) return; | ||||
|     std::unique_lock<std::mutex> l(mu_); | ||||
|     while (!notified_) { | ||||
|       cv_.wait(l); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   std::mutex mu_; | ||||
|   std::condition_variable cv_; | ||||
|   std::atomic<unsigned int> state_;  // low bit is waiter flag | ||||
|   bool notified_; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Notification is an object that allows a user to to wait for another | ||||
| // thread to signal a notification that an event has occurred. | ||||
| // | ||||
| // Multiple threads can wait on the same Notification object, | ||||
| // but only one caller must call Notify() on the object. | ||||
| struct Notification : Barrier { | ||||
|   Notification() : Barrier(1) {}; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Runs an arbitrary function and then calls Notify() on the passed in | ||||
| // Notification. | ||||
| template <typename Function, typename... Args> struct FunctionWrapperWithNotification | ||||
| { | ||||
|   static void run(Notification* n, Function f, Args... args) { | ||||
|     f(args...); | ||||
|     if (n) { | ||||
|       n->Notify(); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Function, typename... Args> struct FunctionWrapperWithBarrier | ||||
| { | ||||
|   static void run(Barrier* b, Function f, Args... args) { | ||||
|     f(args...); | ||||
|     if (b) { | ||||
|       b->Notify(); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename SyncType> | ||||
| static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { | ||||
|   if (n) { | ||||
|     n->Wait(); | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| // Build a thread pool device on top the an existing pool of threads. | ||||
| struct ThreadPoolDevice { | ||||
|   // The ownership of the thread pool remains with the caller. | ||||
|   ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { | ||||
|     return internal::aligned_malloc(num_bytes); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void deallocate(void* buffer) const { | ||||
|     internal::aligned_free(buffer); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { | ||||
|     ::memcpy(dst, src, n); | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { | ||||
|     memcpy(dst, src, n); | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { | ||||
|     memcpy(dst, src, n); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { | ||||
|     ::memset(buffer, c, n); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE int numThreads() const { | ||||
|     return num_threads_; | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { | ||||
|     return l1CacheSize(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { | ||||
|     // The l3 cache size is shared between all the cores. | ||||
|     return l3CacheSize() / num_threads_; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { | ||||
|     // Should return an enum that encodes the ISA supported by the CPU | ||||
|     return 1; | ||||
|   } | ||||
|  | ||||
|   template <class Function, class... Args> | ||||
|   EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { | ||||
|     Notification* n = new Notification(); | ||||
|     pool_->Schedule(std::bind(&FunctionWrapperWithNotification<Function, Args...>::run, n, f, args...)); | ||||
|     return n; | ||||
|   } | ||||
|  | ||||
|   template <class Function, class... Args> | ||||
|   EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b, | ||||
|                                                 Function&& f, | ||||
|                                                 Args&&... args) const { | ||||
|     pool_->Schedule(std::bind( | ||||
|         &FunctionWrapperWithBarrier<Function, Args...>::run, b, f, args...)); | ||||
|   } | ||||
|  | ||||
|   template <class Function, class... Args> | ||||
|   EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { | ||||
|     pool_->Schedule(std::bind(f, args...)); | ||||
|   } | ||||
|  | ||||
|   // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if | ||||
|   // called from one of the threads in pool_. Returns -1 otherwise. | ||||
|   EIGEN_STRONG_INLINE int currentThreadId() const { | ||||
|     return pool_->CurrentThreadId(); | ||||
|   } | ||||
|  | ||||
|   // parallelFor executes f with [0, n) arguments in parallel and waits for | ||||
|   // completion. F accepts a half-open interval [first, last). | ||||
|   // Block size is choosen based on the iteration cost and resulting parallel | ||||
|   // efficiency. If block_align is not nullptr, it is called to round up the | ||||
|   // block size. | ||||
|   void parallelFor(Index n, const TensorOpCost& cost, | ||||
|                    std::function<Index(Index)> block_align, | ||||
|                    std::function<void(Index, Index)> f) const { | ||||
|     typedef TensorCostModel<ThreadPoolDevice> CostModel; | ||||
|     if (n <= 1 || numThreads() == 1 || | ||||
|         CostModel::numThreads(n, cost, static_cast<int>(numThreads())) == 1) { | ||||
|       f(0, n); | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     // Calculate block size based on (1) the iteration cost and (2) parallel | ||||
|     // efficiency. We want blocks to be not too small to mitigate | ||||
|     // parallelization overheads; not too large to mitigate tail | ||||
|     // effect and potential load imbalance and we also want number | ||||
|     // of blocks to be evenly dividable across threads. | ||||
|  | ||||
|     double block_size_f = 1.0 / CostModel::taskSize(1, cost); | ||||
|     const Index max_oversharding_factor = 4; | ||||
|     Index block_size = numext::mini( | ||||
|         n, numext::maxi<Index>(divup<Index>(n, max_oversharding_factor * numThreads()), | ||||
|                                block_size_f)); | ||||
|     const Index max_block_size = numext::mini(n, 2 * block_size); | ||||
|     if (block_align) { | ||||
|       Index new_block_size = block_align(block_size); | ||||
|       eigen_assert(new_block_size >= block_size); | ||||
|       block_size = numext::mini(n, new_block_size); | ||||
|     } | ||||
|     Index block_count = divup(n, block_size); | ||||
|     // Calculate parallel efficiency as fraction of total CPU time used for | ||||
|     // computations: | ||||
|     double max_efficiency = | ||||
|         static_cast<double>(block_count) / | ||||
|         (divup<int>(block_count, numThreads()) * numThreads()); | ||||
|     // Now try to increase block size up to max_block_size as long as it | ||||
|     // doesn't decrease parallel efficiency. | ||||
|     for (Index prev_block_count = block_count; | ||||
|          max_efficiency < 1.0 && prev_block_count > 1;) { | ||||
|       // This is the next block size that divides size into a smaller number | ||||
|       // of blocks than the current block_size. | ||||
|       Index coarser_block_size = divup(n, prev_block_count - 1); | ||||
|       if (block_align) { | ||||
|         Index new_block_size = block_align(coarser_block_size); | ||||
|         eigen_assert(new_block_size >= coarser_block_size); | ||||
|         coarser_block_size = numext::mini(n, new_block_size); | ||||
|       } | ||||
|       if (coarser_block_size > max_block_size) { | ||||
|         break;  // Reached max block size. Stop. | ||||
|       } | ||||
|       // Recalculate parallel efficiency. | ||||
|       const Index coarser_block_count = divup(n, coarser_block_size); | ||||
|       eigen_assert(coarser_block_count < prev_block_count); | ||||
|       prev_block_count = coarser_block_count; | ||||
|       const double coarser_efficiency = | ||||
|           static_cast<double>(coarser_block_count) / | ||||
|           (divup<int>(coarser_block_count, numThreads()) * numThreads()); | ||||
|       if (coarser_efficiency + 0.01 >= max_efficiency) { | ||||
|         // Taking it. | ||||
|         block_size = coarser_block_size; | ||||
|         block_count = coarser_block_count; | ||||
|         if (max_efficiency < coarser_efficiency) { | ||||
|           max_efficiency = coarser_efficiency; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Recursively divide size into halves until we reach block_size. | ||||
|     // Division code rounds mid to block_size, so we are guaranteed to get | ||||
|     // block_count leaves that do actual computations. | ||||
|     Barrier barrier(static_cast<unsigned int>(block_count)); | ||||
|     std::function<void(Index, Index)> handleRange; | ||||
|     handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { | ||||
|       if (last - first <= block_size) { | ||||
|         // Single block or less, execute directly. | ||||
|         f(first, last); | ||||
|         barrier.Notify(); | ||||
|         return; | ||||
|       } | ||||
|       // Split into halves and submit to the pool. | ||||
|       Index mid = first + divup((last - first) / 2, block_size) * block_size; | ||||
|       pool_->Schedule([=, &handleRange]() { handleRange(mid, last); }); | ||||
|       pool_->Schedule([=, &handleRange]() { handleRange(first, mid); }); | ||||
|     }; | ||||
|     handleRange(0, n); | ||||
|     barrier.Wait(); | ||||
|   } | ||||
|  | ||||
|   // Convenience wrapper for parallelFor that does not align blocks. | ||||
|   void parallelFor(Index n, const TensorOpCost& cost, | ||||
|                    std::function<void(Index, Index)> f) const { | ||||
|     parallelFor(n, cost, nullptr, std::move(f)); | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   ThreadPoolInterface* pool_; | ||||
|   int num_threads_; | ||||
| }; | ||||
|  | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H | ||||
							
								
								
									
										236
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										236
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,236 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class TensorDimensionList | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n. | ||||
|   * | ||||
|   * \sa Tensor | ||||
|   */ | ||||
|  | ||||
| template <typename Index, std::size_t Rank> struct DimensionList { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
|   const Index operator[] (const Index i) const { return i; } | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template<typename Index, std::size_t Rank> struct array_size<DimensionList<Index, Rank> > { | ||||
|   static const size_t value = Rank; | ||||
| }; | ||||
| template<typename Index, std::size_t Rank> struct array_size<const DimensionList<Index, Rank> > { | ||||
|   static const size_t value = Rank; | ||||
| }; | ||||
|  | ||||
| template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(DimensionList<Index, Rank>&) { | ||||
|   return n; | ||||
| } | ||||
| template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(const DimensionList<Index, Rank>&) { | ||||
|   return n; | ||||
| } | ||||
|  | ||||
|  | ||||
| #if EIGEN_HAS_CONSTEXPR | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_known_statically_impl<DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_known_statically_impl<const DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct all_indices_known_statically_impl<DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_eq_impl<DimensionList<Index, Rank> > { | ||||
|   static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return i == value; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_eq_impl<const DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return i == value; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_ne_impl<DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return i != value; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_ne_impl<const DimensionList<Index, Rank> > { | ||||
|   static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return i != value; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_gt_impl<DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return i > value; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_gt_impl<const DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return i > value; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_lt_impl<DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return i < value; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_lt_impl<const DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return i < value; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #else | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_known_statically_impl<DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_known_statically_impl<const DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct all_indices_known_statically_impl<DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > { | ||||
|   EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_eq_impl<DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_eq_impl<const DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_ne_impl<DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){ | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_ne_impl<const DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_gt_impl<DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_gt_impl<const DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_lt_impl<DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
| template <typename Index, std::size_t Rank> | ||||
| struct index_statically_lt_impl<const DimensionList<Index, Rank> > { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
| #endif | ||||
|  | ||||
| }  // end namespace internal | ||||
| }  // end namespace Eigen | ||||
|  | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H | ||||
							
								
								
									
										428
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										428
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,428 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class TensorDimensions | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Set of classes used to encode and store the dimensions of a Tensor. | ||||
|   * | ||||
|   * The Sizes class encodes as part of the type the number of dimensions and the | ||||
|   * sizes corresponding to each dimension. It uses no storage space since it is | ||||
|   * entirely known at compile time. | ||||
|   * The DSizes class is its dynamic sibling: the number of dimensions is known | ||||
|   * at compile time but the sizes are set during execution. | ||||
|   * | ||||
|   * \sa Tensor | ||||
|   */ | ||||
|  | ||||
| // Boilerplate code | ||||
| namespace internal { | ||||
|  | ||||
| template<std::size_t n, typename Dimension> struct dget { | ||||
|   static const std::size_t value = get<n, Dimension>::value; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> | ||||
| struct fixed_size_tensor_index_linearization_helper | ||||
| { | ||||
|   template <typename Dimensions> EIGEN_DEVICE_FUNC | ||||
|   static inline Index run(array<Index, NumIndices> const& indices, | ||||
|                           const Dimensions& dimensions) | ||||
|   { | ||||
|     return array_get<RowMajor ? n - 1 : (NumIndices - n)>(indices) + | ||||
|         dget<RowMajor ? n - 1 : (NumIndices - n), Dimensions>::value * | ||||
|         fixed_size_tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Index, std::size_t NumIndices, bool RowMajor> | ||||
| struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> | ||||
| { | ||||
|   template <typename Dimensions> EIGEN_DEVICE_FUNC | ||||
|   static inline Index run(array<Index, NumIndices> const&, const Dimensions&) | ||||
|   { | ||||
|     return 0; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Index, std::size_t n> | ||||
| struct fixed_size_tensor_index_extraction_helper | ||||
| { | ||||
|   template <typename Dimensions> EIGEN_DEVICE_FUNC | ||||
|   static inline Index run(const Index index, | ||||
|                           const Dimensions& dimensions) | ||||
|   { | ||||
|     const Index mult = (index == n-1) ? 1 : 0; | ||||
|     return array_get<n-1>(dimensions) * mult + | ||||
|         fixed_size_tensor_index_extraction_helper<Index, n - 1>::run(index, dimensions); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Index> | ||||
| struct fixed_size_tensor_index_extraction_helper<Index, 0> | ||||
| { | ||||
|   template <typename Dimensions> EIGEN_DEVICE_FUNC | ||||
|   static inline Index run(const Index, | ||||
|                           const Dimensions&) | ||||
|   { | ||||
|     return 0; | ||||
|   } | ||||
|   }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
| // Fixed size | ||||
| #ifndef EIGEN_EMULATE_CXX11_META_H | ||||
| template <typename std::ptrdiff_t... Indices> | ||||
| struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> { | ||||
|   typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base; | ||||
|   static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { | ||||
|     return Base::count; | ||||
|   } | ||||
|  | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t TotalSize() { | ||||
|     return internal::arg_prod(Indices...); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Sizes() { } | ||||
|   template <typename DenseIndex> | ||||
|   explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) { | ||||
|     // todo: add assertion | ||||
|   } | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|   template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { } | ||||
|   explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) { | ||||
|     // todo: add assertion | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   template <typename T> Sizes& operator = (const T& /*other*/) { | ||||
|     // add assertion failure if the size of other is different | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { | ||||
|     return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, *this); | ||||
|   } | ||||
|  | ||||
|   template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { | ||||
|     return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *static_cast<const Base*>(this)); | ||||
|   } | ||||
|   template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { | ||||
|     return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *static_cast<const Base*>(this)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
| template <typename std::ptrdiff_t... Indices> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indices...>&) { | ||||
|   return Sizes<Indices...>::total_size; | ||||
| } | ||||
| } | ||||
|  | ||||
| #else | ||||
|  | ||||
| template <std::size_t n> | ||||
| struct non_zero_size { | ||||
|   typedef internal::type2val<std::size_t, n> type; | ||||
| }; | ||||
| template <> | ||||
| struct non_zero_size<0> { | ||||
|   typedef internal::null_type type; | ||||
| }; | ||||
|  | ||||
| template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes { | ||||
|   typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base; | ||||
|   static const size_t count = Base::count; | ||||
|   static const std::size_t total_size = internal::arg_prod<Base>::value; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { | ||||
|     return count; | ||||
|   } | ||||
|  | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() { | ||||
|     return internal::arg_prod<Base>::value; | ||||
|   } | ||||
|  | ||||
|   Sizes() { } | ||||
|   template <typename DenseIndex> | ||||
|   explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) { | ||||
|     // todo: add assertion | ||||
|   } | ||||
|   template <typename T> Sizes& operator = (const T& /*other*/) { | ||||
|     // add assertion failure if the size of other is different | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|   template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { } | ||||
|   explicit Sizes(std::initializer_list<std::size_t>) { | ||||
|     // todo: add assertion | ||||
|   } | ||||
| #else | ||||
|   EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) { | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) { | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) { | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[] (const Index index) const { | ||||
|     switch (index) { | ||||
|       case 0: | ||||
|         return internal::get<0, Base>::value; | ||||
|       case 1: | ||||
|         return internal::get<1, Base>::value; | ||||
|       case 2: | ||||
|         return internal::get<2, Base>::value; | ||||
|       case 3: | ||||
|         return internal::get<3, Base>::value; | ||||
|       case 4: | ||||
|         return internal::get<4, Base>::value; | ||||
|       default: | ||||
|         eigen_assert(false && "index overflow"); | ||||
|         return static_cast<Index>(-1); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { | ||||
|     return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this)); | ||||
|   } | ||||
|   template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { | ||||
|     return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
| template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { | ||||
|   return Sizes<V1, V2, V3, V4, V5>::total_size; | ||||
| } | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| // Boilerplate | ||||
| namespace internal { | ||||
| template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> | ||||
| struct tensor_index_linearization_helper | ||||
| { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const& dimensions) | ||||
|   { | ||||
|     return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) + | ||||
|       array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) * | ||||
|         tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Index, std::size_t NumIndices, bool RowMajor> | ||||
| struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> | ||||
| { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const&) | ||||
|   { | ||||
|     return array_get<RowMajor ? 0 : NumIndices - 1>(indices); | ||||
|   } | ||||
| }; | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| // Dynamic size | ||||
| template <typename DenseIndex, int NumDims> | ||||
| struct DSizes : array<DenseIndex, NumDims> { | ||||
|   typedef array<DenseIndex, NumDims> Base; | ||||
|   static const int count = NumDims; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { | ||||
|     return NumDims; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const { | ||||
|     return (NumDims == 0) ? 1 : internal::array_prod(*static_cast<const Base*>(this)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { | ||||
|     for (int i = 0 ; i < NumDims; ++i) { | ||||
|       (*this)[i] = 0; | ||||
|     } | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC explicit DSizes(const array<DenseIndex, NumDims>& a) : Base(a) { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) { | ||||
|     eigen_assert(NumDims == 1); | ||||
|     (*this)[0] = i0; | ||||
|   } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|   template<typename... IndexTypes> EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) { | ||||
|     EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|   } | ||||
| #else | ||||
|   EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) { | ||||
|     eigen_assert(NumDims == 2); | ||||
|     (*this)[0] = i0; | ||||
|     (*this)[1] = i1; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { | ||||
|     eigen_assert(NumDims == 3); | ||||
|     (*this)[0] = i0; | ||||
|     (*this)[1] = i1; | ||||
|     (*this)[2] = i2; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { | ||||
|     eigen_assert(NumDims == 4); | ||||
|     (*this)[0] = i0; | ||||
|     (*this)[1] = i1; | ||||
|     (*this)[2] = i2; | ||||
|     (*this)[3] = i3; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { | ||||
|     eigen_assert(NumDims == 5); | ||||
|     (*this)[0] = i0; | ||||
|     (*this)[1] = i1; | ||||
|     (*this)[2] = i2; | ||||
|     (*this)[3] = i3; | ||||
|     (*this)[4] = i4; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC DSizes& operator = (const array<DenseIndex, NumDims>& other) { | ||||
|     *static_cast<Base*>(this) = other; | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   // A constexpr would be so much better here | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const { | ||||
|     return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this)); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const { | ||||
|     return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| // Boilerplate | ||||
| namespace internal { | ||||
| template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> | ||||
| struct tensor_vsize_index_linearization_helper | ||||
| { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const& dimensions) | ||||
|   { | ||||
|     return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) + | ||||
|       array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) * | ||||
|         tensor_vsize_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Index, std::size_t NumIndices, bool RowMajor> | ||||
| struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> | ||||
| { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const&) | ||||
|   { | ||||
|     return array_get<RowMajor ? 0 : NumIndices - 1>(indices); | ||||
|   } | ||||
| }; | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > { | ||||
|   static const size_t value = NumDims; | ||||
| }; | ||||
| template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > { | ||||
|   static const size_t value = NumDims; | ||||
| }; | ||||
| #ifndef EIGEN_EMULATE_CXX11_META_H | ||||
| template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > { | ||||
| static const std::ptrdiff_t value = Sizes<Indices...>::count; | ||||
| }; | ||||
| template <typename std::ptrdiff_t... Indices> struct array_size<Sizes<Indices...> > { | ||||
| static const std::ptrdiff_t value = Sizes<Indices...>::count; | ||||
| }; | ||||
| template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) { | ||||
|   return get<n, internal::numeric_list<std::size_t, Indices...> >::value; | ||||
| } | ||||
| template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { | ||||
|   eigen_assert(false && "should never be called"); | ||||
|   return -1; | ||||
| } | ||||
| #else | ||||
| template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > { | ||||
|   static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; | ||||
| }; | ||||
| template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > { | ||||
|   static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; | ||||
| }; | ||||
| template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) { | ||||
|   return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value; | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
|  | ||||
| template <typename Dims1, typename Dims2, size_t n, size_t m> | ||||
| struct sizes_match_below_dim { | ||||
|   static EIGEN_DEVICE_FUNC  inline bool run(Dims1&, Dims2&) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
| template <typename Dims1, typename Dims2, size_t n> | ||||
| struct sizes_match_below_dim<Dims1, Dims2, n, n> { | ||||
|   static EIGEN_DEVICE_FUNC  inline bool run(Dims1& dims1, Dims2& dims2) { | ||||
|     return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) & | ||||
|         sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2); | ||||
|   } | ||||
| }; | ||||
| template <typename Dims1, typename Dims2> | ||||
| struct sizes_match_below_dim<Dims1, Dims2, 0, 0> { | ||||
|   static EIGEN_DEVICE_FUNC  inline bool run(Dims1&, Dims2&) { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } // end namespace internal | ||||
|  | ||||
|  | ||||
| template <typename Dims1, typename Dims2> | ||||
| EIGEN_DEVICE_FUNC bool dimensions_match(Dims1& dims1, Dims2& dims2) { | ||||
|   return internal::sizes_match_below_dim<Dims1, Dims2, internal::array_size<Dims1>::value, internal::array_size<Dims2>::value>::run(dims1, dims2); | ||||
| } | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H | ||||
							
								
								
									
										181
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,181 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorForcedEval | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor reshaping class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename XprType, template <class> class MakePointer_> | ||||
| struct traits<TensorEvalToOp<XprType, MakePointer_> > | ||||
| { | ||||
|   // Type promotion to handle the case where the types of the lhs and the rhs are different. | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
|  | ||||
|   enum { | ||||
|     Flags = 0 | ||||
|   }; | ||||
|   template <class T> | ||||
|   struct MakePointer { | ||||
|     // Intermediate typedef to workaround MSVC issue. | ||||
|     typedef MakePointer_<T> MakePointerT; | ||||
|     typedef typename MakePointerT::Type Type; | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename XprType, template <class> class MakePointer_> | ||||
| struct eval<TensorEvalToOp<XprType, MakePointer_>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorEvalToOp<XprType, MakePointer_>& type; | ||||
| }; | ||||
|  | ||||
| template<typename XprType, template <class> class MakePointer_> | ||||
| struct nested<TensorEvalToOp<XprType, MakePointer_>, 1, typename eval<TensorEvalToOp<XprType, MakePointer_> >::type> | ||||
| { | ||||
|   typedef TensorEvalToOp<XprType, MakePointer_> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename XprType, template <class> class MakePointer_> | ||||
| class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|   typedef typename MakePointer_<CoeffReturnType>::Type PointerType; | ||||
|   typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) | ||||
|       : m_xpr(expr), m_buffer(buffer) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     PointerType m_buffer; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename ArgType, typename Device, template <class> class MakePointer_> | ||||
| struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> | ||||
| { | ||||
|   typedef TensorEvalToOp<ArgType, MakePointer_> XprType; | ||||
|   typedef typename ArgType::Scalar Scalar; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = true | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device), m_device(device), | ||||
|           m_buffer(op.buffer()), m_op(op), m_expression(op.expression()) | ||||
|   { } | ||||
|  | ||||
|   // Used for accessor extraction in SYCL Managed TensorMap: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const { | ||||
|     return m_op; | ||||
|   } | ||||
|    | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { | ||||
|   } | ||||
|  | ||||
|   typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_> >::template MakePointer<CoeffReturnType>::Type DevicePointer; | ||||
|   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { | ||||
|     EIGEN_UNUSED_VARIABLE(scalar); | ||||
|     eigen_assert(scalar == NULL); | ||||
|     return m_impl.evalSubExprsIfNeeded(m_buffer); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { | ||||
|     m_buffer[i] = m_impl.coeff(i); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { | ||||
|     internal::pstoret<CoeffReturnType, PacketReturnType, Aligned>(m_buffer + i, m_impl.template packet<TensorEvaluator<ArgType, Device>::IsAligned ? Aligned : Unaligned>(i)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_buffer[index]; | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     // We assume that evalPacket or evalScalar is called to perform the | ||||
|     // assignment and account for the cost of the write here. | ||||
|     return m_impl.costPerCoeff(vectorized) + | ||||
|         TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; } | ||||
|   ArgType expression() const { return m_expression; } | ||||
|  | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } | ||||
|   /// added for sycl in order to construct the buffer from the sycl device | ||||
|   const Device& device() const{return m_device;} | ||||
|  | ||||
|  private: | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   const Device& m_device; | ||||
|   DevicePointer m_buffer; | ||||
|   const XprType& m_op; | ||||
|   const ArgType m_expression; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H | ||||
							
								
								
									
										633
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										633
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,633 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorEvaluator | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief The tensor evaluator classes. | ||||
|   * | ||||
|   * These classes are responsible for the evaluation of the tensor expression. | ||||
|   * | ||||
|   * TODO: add support for more types of expressions, in particular expressions | ||||
|   * leading to lvalues (slicing, reshaping, etc...) | ||||
|   */ | ||||
|  | ||||
| // Generic evaluator | ||||
| template<typename Derived, typename Device> | ||||
| struct TensorEvaluator | ||||
| { | ||||
|   typedef typename Derived::Index Index; | ||||
|   typedef typename Derived::Scalar Scalar; | ||||
|   typedef typename Derived::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef typename Derived::Dimensions Dimensions; | ||||
|  | ||||
|   // NumDimensions is -1 for variable dim tensors | ||||
|   static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? | ||||
|                                internal::traits<Derived>::NumDimensions : 0; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = Derived::IsAligned, | ||||
|     PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), | ||||
|     Layout = Derived::Layout, | ||||
|     CoordAccess = NumCoords > 0, | ||||
|     RawAccess = true | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) | ||||
|       : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) | ||||
|   { } | ||||
|  | ||||
|   // Used for accessor extraction in SYCL Managed TensorMap: | ||||
|   const Derived& derived() const { return m_impl; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { | ||||
|     if (dest) { | ||||
|       m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize()); | ||||
|       return false; | ||||
|     } | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { | ||||
|     eigen_assert(m_data); | ||||
|     return m_data[index]; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { | ||||
|     eigen_assert(m_data); | ||||
|     return m_data[index]; | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); | ||||
|   } | ||||
|  | ||||
|   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) | ||||
|   { | ||||
|     return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { | ||||
|     eigen_assert(m_data); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       return m_data[m_dims.IndexOfColMajor(coords)]; | ||||
|     } else { | ||||
|       return m_data[m_dims.IndexOfRowMajor(coords)]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<DenseIndex, NumCoords>& coords) { | ||||
|     eigen_assert(m_data); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       return m_data[m_dims.IndexOfColMajor(coords)]; | ||||
|     } else { | ||||
|       return m_data[m_dims.IndexOfRowMajor(coords)]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, | ||||
|                         internal::unpacket_traits<PacketReturnType>::size); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; } | ||||
|  | ||||
|   /// required by sycl in order to construct sycl buffer from raw pointer | ||||
|   const Device& device() const{return m_device;} | ||||
|  | ||||
|  protected: | ||||
|   typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data; | ||||
|   Dimensions m_dims; | ||||
|   const Device& m_device; | ||||
|   const Derived& m_impl; | ||||
| }; | ||||
|  | ||||
| namespace { | ||||
| template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| T loadConstant(const T* address) { | ||||
|   return *address; | ||||
| } | ||||
| // Use the texture cache on CUDA devices whenever possible | ||||
| #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| float loadConstant(const float* address) { | ||||
|   return __ldg(address); | ||||
| } | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| double loadConstant(const double* address) { | ||||
|   return __ldg(address); | ||||
| } | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| Eigen::half loadConstant(const Eigen::half* address) { | ||||
|   return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); | ||||
| } | ||||
| #endif | ||||
| } | ||||
|  | ||||
|  | ||||
| // Default evaluator for rvalues | ||||
| template<typename Derived, typename Device> | ||||
| struct TensorEvaluator<const Derived, Device> | ||||
| { | ||||
|   typedef typename Derived::Index Index; | ||||
|   typedef typename Derived::Scalar Scalar; | ||||
|   typedef typename Derived::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef typename Derived::Dimensions Dimensions; | ||||
|  | ||||
|   // NumDimensions is -1 for variable dim tensors | ||||
|   static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? | ||||
|                                internal::traits<Derived>::NumDimensions : 0; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = Derived::IsAligned, | ||||
|     PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), | ||||
|     Layout = Derived::Layout, | ||||
|     CoordAccess = NumCoords > 0, | ||||
|     RawAccess = true | ||||
|   }; | ||||
|  | ||||
|   // Used for accessor extraction in SYCL Managed TensorMap: | ||||
|   const Derived& derived() const { return m_impl; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) | ||||
|       : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) | ||||
|   { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { | ||||
|     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) { | ||||
|       m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar)); | ||||
|       return false; | ||||
|     } | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { | ||||
|     eigen_assert(m_data); | ||||
|     return loadConstant(m_data+index); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { | ||||
|     eigen_assert(m_data); | ||||
|     const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords) | ||||
|                         : m_dims.IndexOfRowMajor(coords); | ||||
|     return loadConstant(m_data+index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, | ||||
|                         internal::unpacket_traits<PacketReturnType>::size); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; } | ||||
|  | ||||
|   /// added for sycl in order to construct the buffer from the sycl device | ||||
|   const Device& device() const{return m_device;} | ||||
|  | ||||
|  protected: | ||||
|   typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data; | ||||
|   Dimensions m_dims; | ||||
|   const Device& m_device; | ||||
|   const Derived& m_impl; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| // -------------------- CwiseNullaryOp -------------------- | ||||
|  | ||||
| template<typename NullaryOp, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = true, | ||||
|     PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() | ||||
|   { } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename internal::traits<XprType>::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_wrapper(m_functor, index); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, | ||||
|                         internal::unpacket_traits<PacketReturnType>::size); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } | ||||
|  | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   NullaryOp functor() const { return m_functor; } | ||||
|  | ||||
|  | ||||
|  private: | ||||
|   const NullaryOp m_functor; | ||||
|   TensorEvaluator<ArgType, Device> m_argImpl; | ||||
|   const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| // -------------------- CwiseUnaryOp -------------------- | ||||
|  | ||||
| template<typename UnaryOp, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & internal::functor_traits<UnaryOp>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : m_functor(op.functor()), | ||||
|       m_argImpl(op.nestedExpression(), device) | ||||
|   { } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename internal::traits<XprType>::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { | ||||
|     m_argImpl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_argImpl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_functor(m_argImpl.coeff(index)); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     const double functor_cost = internal::functor_traits<UnaryOp>::Cost; | ||||
|     return m_argImpl.costPerCoeff(vectorized) + | ||||
|         TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } | ||||
|  | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; } | ||||
|   /// added for sycl in order to construct the buffer from sycl device | ||||
|   UnaryOp functor() const { return m_functor; } | ||||
|  | ||||
|  | ||||
|  private: | ||||
|   const UnaryOp m_functor; | ||||
|   TensorEvaluator<ArgType, Device> m_argImpl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // -------------------- CwiseBinaryOp -------------------- | ||||
|  | ||||
| template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device> | ||||
| { | ||||
|   typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess & | ||||
|                    internal::functor_traits<BinaryOp>::PacketAccess, | ||||
|     Layout = TensorEvaluator<LeftArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : m_functor(op.functor()), | ||||
|       m_leftImpl(op.lhsExpression(), device), | ||||
|       m_rightImpl(op.rhsExpression(), device) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); | ||||
|   } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename internal::traits<XprType>::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|   typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const | ||||
|   { | ||||
|     // TODO: use right impl instead if right impl dimensions are known at compile time. | ||||
|     return m_leftImpl.dimensions(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { | ||||
|     m_leftImpl.evalSubExprsIfNeeded(NULL); | ||||
|     m_rightImpl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_leftImpl.cleanup(); | ||||
|     m_rightImpl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); | ||||
|   } | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     const double functor_cost = internal::functor_traits<BinaryOp>::Cost; | ||||
|     return m_leftImpl.costPerCoeff(vectorized) + | ||||
|            m_rightImpl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   BinaryOp functor() const { return m_functor; } | ||||
|  | ||||
|  private: | ||||
|   const BinaryOp m_functor; | ||||
|   TensorEvaluator<LeftArgType, Device> m_leftImpl; | ||||
|   TensorEvaluator<RightArgType, Device> m_rightImpl; | ||||
| }; | ||||
|  | ||||
| // -------------------- CwiseTernaryOp -------------------- | ||||
|  | ||||
| template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device> | ||||
| struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device> | ||||
| { | ||||
|   typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess & TensorEvaluator<Arg2Type, Device>::PacketAccess & TensorEvaluator<Arg3Type, Device>::PacketAccess & | ||||
|                    internal::functor_traits<TernaryOp>::PacketAccess, | ||||
|     Layout = TensorEvaluator<Arg1Type, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : m_functor(op.functor()), | ||||
|       m_arg1Impl(op.arg1Expression(), device), | ||||
|       m_arg2Impl(op.arg2Expression(), device), | ||||
|       m_arg3Impl(op.arg3Expression(), device) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|     EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind, | ||||
|                          typename internal::traits<Arg2Type>::StorageKind>::value), | ||||
|                         STORAGE_KIND_MUST_MATCH) | ||||
|     EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind, | ||||
|                          typename internal::traits<Arg3Type>::StorageKind>::value), | ||||
|                         STORAGE_KIND_MUST_MATCH) | ||||
|     EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index, | ||||
|                          typename internal::traits<Arg2Type>::Index>::value), | ||||
|                         STORAGE_INDEX_MUST_MATCH) | ||||
|     EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index, | ||||
|                          typename internal::traits<Arg3Type>::Index>::value), | ||||
|                         STORAGE_INDEX_MUST_MATCH) | ||||
|  | ||||
|     eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); | ||||
|   } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename internal::traits<XprType>::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|   typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const | ||||
|   { | ||||
|     // TODO: use arg2 or arg3 dimensions if they are known at compile time. | ||||
|     return m_arg1Impl.dimensions(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { | ||||
|     m_arg1Impl.evalSubExprsIfNeeded(NULL); | ||||
|     m_arg2Impl.evalSubExprsIfNeeded(NULL); | ||||
|     m_arg3Impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_arg1Impl.cleanup(); | ||||
|     m_arg2Impl.cleanup(); | ||||
|     m_arg3Impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); | ||||
|   } | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index), | ||||
|                               m_arg2Impl.template packet<LoadMode>(index), | ||||
|                               m_arg3Impl.template packet<LoadMode>(index)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     const double functor_cost = internal::functor_traits<TernaryOp>::Cost; | ||||
|     return m_arg1Impl.costPerCoeff(vectorized) + | ||||
|            m_arg2Impl.costPerCoeff(vectorized) + | ||||
|            m_arg3Impl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } | ||||
|  | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; } | ||||
|  | ||||
|  private: | ||||
|   const TernaryOp m_functor; | ||||
|   TensorEvaluator<Arg1Type, Device> m_arg1Impl; | ||||
|   TensorEvaluator<Arg2Type, Device> m_arg2Impl; | ||||
|   TensorEvaluator<Arg3Type, Device> m_arg3Impl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // -------------------- SelectOp -------------------- | ||||
|  | ||||
| template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device> | ||||
| { | ||||
|   typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess & | ||||
|                    internal::packet_traits<Scalar>::HasBlend, | ||||
|     Layout = TensorEvaluator<IfArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : m_condImpl(op.ifExpression(), device), | ||||
|       m_thenImpl(op.thenExpression(), device), | ||||
|       m_elseImpl(op.elseExpression(), device) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions())); | ||||
|     eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions())); | ||||
|   } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename internal::traits<XprType>::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|   typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const | ||||
|   { | ||||
|     // TODO: use then or else impl instead if they happen to be known at compile time. | ||||
|     return m_condImpl.dimensions(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { | ||||
|     m_condImpl.evalSubExprsIfNeeded(NULL); | ||||
|     m_thenImpl.evalSubExprsIfNeeded(NULL); | ||||
|     m_elseImpl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_condImpl.cleanup(); | ||||
|     m_thenImpl.cleanup(); | ||||
|     m_elseImpl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); | ||||
|   } | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     internal::Selector<PacketSize> select; | ||||
|     for (Index i = 0; i < PacketSize; ++i) { | ||||
|       select.select[i] = m_condImpl.coeff(index+i); | ||||
|     } | ||||
|     return internal::pblend(select, | ||||
|                             m_thenImpl.template packet<LoadMode>(index), | ||||
|                             m_elseImpl.template packet<LoadMode>(index)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     return m_condImpl.costPerCoeff(vectorized) + | ||||
|            m_thenImpl.costPerCoeff(vectorized) | ||||
|         .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; } | ||||
|  | ||||
|  private: | ||||
|   TensorEvaluator<IfArgType, Device> m_condImpl; | ||||
|   TensorEvaluator<ThenArgType, Device> m_thenImpl; | ||||
|   TensorEvaluator<ElseArgType, Device> m_elseImpl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H | ||||
							
								
								
									
										288
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										288
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,288 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorExecutor | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief The tensor executor class. | ||||
|   * | ||||
|   * This class is responsible for launch the evaluation of the expression on | ||||
|   * the specified computing device. | ||||
|   */ | ||||
| namespace internal { | ||||
|  | ||||
| // Default strategy: the expression is evaluated with a single cpu thread. | ||||
| template<typename Expression, typename Device, bool Vectorizable> | ||||
| class TensorExecutor | ||||
| { | ||||
|  public: | ||||
|   typedef typename Expression::Index Index; | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   static inline void run(const Expression& expr, const Device& device = Device()) | ||||
|   { | ||||
|     TensorEvaluator<Expression, Device> evaluator(expr, device); | ||||
|     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); | ||||
|     if (needs_assign) | ||||
|     { | ||||
|       const Index size = array_prod(evaluator.dimensions()); | ||||
|       for (Index i = 0; i < size; ++i) { | ||||
|         evaluator.evalScalar(i); | ||||
|       } | ||||
|     } | ||||
|     evaluator.cleanup(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Expression> | ||||
| class TensorExecutor<Expression, DefaultDevice, true> | ||||
| { | ||||
|  public: | ||||
|   typedef typename Expression::Index Index; | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) | ||||
|   { | ||||
|     TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device); | ||||
|     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); | ||||
|     if (needs_assign) | ||||
|     { | ||||
|       const Index size = array_prod(evaluator.dimensions()); | ||||
|       const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size; | ||||
|       // Give the compiler a strong hint to unroll the loop. But don't insist | ||||
|       // on unrolling, because if the function is expensive the compiler should not | ||||
|       // unroll the loop at the expense of inlining. | ||||
|       const Index UnrolledSize = (size / (4 * PacketSize)) * 4 * PacketSize; | ||||
|       for (Index i = 0; i < UnrolledSize; i += 4*PacketSize) { | ||||
|         for (Index j = 0; j < 4; j++) { | ||||
|           evaluator.evalPacket(i + j * PacketSize); | ||||
|         } | ||||
|       } | ||||
|       const Index VectorizedSize = (size / PacketSize) * PacketSize; | ||||
|       for (Index i = UnrolledSize; i < VectorizedSize; i += PacketSize) { | ||||
|         evaluator.evalPacket(i); | ||||
|       } | ||||
|       for (Index i = VectorizedSize; i < size; ++i) { | ||||
|         evaluator.evalScalar(i); | ||||
|       } | ||||
|     } | ||||
|     evaluator.cleanup(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| // Multicore strategy: the index space is partitioned and each partition is executed on a single core | ||||
| #ifdef EIGEN_USE_THREADS | ||||
| template <typename Evaluator, typename Index, bool Vectorizable> | ||||
| struct EvalRange { | ||||
|   static void run(Evaluator* evaluator_in, const Index first, const Index last) { | ||||
|     Evaluator evaluator = *evaluator_in; | ||||
|     eigen_assert(last >= first); | ||||
|     for (Index i = first; i < last; ++i) { | ||||
|       evaluator.evalScalar(i); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   static Index alignBlockSize(Index size) { | ||||
|     return size; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Evaluator, typename Index> | ||||
| struct EvalRange<Evaluator, Index, true> { | ||||
|   static const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; | ||||
|  | ||||
|   static void run(Evaluator* evaluator_in, const Index first, const Index last) { | ||||
|     Evaluator evaluator = *evaluator_in; | ||||
|     eigen_assert(last >= first); | ||||
|     Index i = first; | ||||
|     if (last - first >= PacketSize) { | ||||
|       eigen_assert(first % PacketSize == 0); | ||||
|       Index last_chunk_offset = last - 4 * PacketSize; | ||||
|       // Give the compiler a strong hint to unroll the loop. But don't insist | ||||
|       // on unrolling, because if the function is expensive the compiler should not | ||||
|       // unroll the loop at the expense of inlining. | ||||
|       for (; i <= last_chunk_offset; i += 4*PacketSize) { | ||||
|         for (Index j = 0; j < 4; j++) { | ||||
|           evaluator.evalPacket(i + j * PacketSize); | ||||
|         } | ||||
|       } | ||||
|       last_chunk_offset = last - PacketSize; | ||||
|       for (; i <= last_chunk_offset; i += PacketSize) { | ||||
|         evaluator.evalPacket(i); | ||||
|       } | ||||
|     } | ||||
|     for (; i < last; ++i) { | ||||
|       evaluator.evalScalar(i); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   static Index alignBlockSize(Index size) { | ||||
|     // Align block size to packet size and account for unrolling in run above. | ||||
|     if (size >= 16 * PacketSize) { | ||||
|       return (size + 4 * PacketSize - 1) & ~(4 * PacketSize - 1); | ||||
|     } | ||||
|     // Aligning to 4 * PacketSize would increase block size by more than 25%. | ||||
|     return (size + PacketSize - 1) & ~(PacketSize - 1); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Expression, bool Vectorizable> | ||||
| class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> { | ||||
|  public: | ||||
|   typedef typename Expression::Index Index; | ||||
|   static inline void run(const Expression& expr, const ThreadPoolDevice& device) | ||||
|   { | ||||
|     typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator; | ||||
|     Evaluator evaluator(expr, device); | ||||
|     const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); | ||||
|     if (needs_assign) | ||||
|     { | ||||
|       const Index size = array_prod(evaluator.dimensions()); | ||||
| #if !defined(EIGEN_USE_SIMPLE_THREAD_POOL) | ||||
|       device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), | ||||
|                          EvalRange<Evaluator, Index, Vectorizable>::alignBlockSize, | ||||
|                          [&evaluator](Index first, Index last) { | ||||
|                            EvalRange<Evaluator, Index, Vectorizable>::run(&evaluator, first, last); | ||||
|                          }); | ||||
| #else | ||||
|       size_t num_threads = device.numThreads(); | ||||
|       if (num_threads > 1) { | ||||
|         num_threads = TensorCostModel<ThreadPoolDevice>::numThreads( | ||||
|             size, evaluator.costPerCoeff(Vectorizable), num_threads); | ||||
|       } | ||||
|       if (num_threads == 1) { | ||||
|         EvalRange<Evaluator, Index, Vectorizable>::run(&evaluator, 0, size); | ||||
|       } else { | ||||
|         const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1; | ||||
|         Index blocksz = std::ceil<Index>(static_cast<float>(size)/num_threads) + PacketSize - 1; | ||||
|         const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize))); | ||||
|         const Index numblocks = size / blocksize; | ||||
|  | ||||
|         Barrier barrier(numblocks); | ||||
|         for (int i = 0; i < numblocks; ++i) { | ||||
|           device.enqueue_with_barrier( | ||||
|               &barrier, &EvalRange<Evaluator, Index, Vectorizable>::run, | ||||
|               &evaluator, i * blocksize, (i + 1) * blocksize); | ||||
|         } | ||||
|         if (numblocks * blocksize < size) { | ||||
|           EvalRange<Evaluator, Index, Vectorizable>::run( | ||||
|               &evaluator, numblocks * blocksize, size); | ||||
|         } | ||||
|         barrier.Wait(); | ||||
|       } | ||||
| #endif  // defined(!EIGEN_USE_SIMPLE_THREAD_POOL) | ||||
|     } | ||||
|     evaluator.cleanup(); | ||||
|   } | ||||
| }; | ||||
| #endif  // EIGEN_USE_THREADS | ||||
|  | ||||
|  | ||||
| // GPU: the evaluation of the expression is offloaded to a GPU. | ||||
| #if defined(EIGEN_USE_GPU) | ||||
|  | ||||
| template <typename Expression, bool Vectorizable> | ||||
| class TensorExecutor<Expression, GpuDevice, Vectorizable> { | ||||
|  public: | ||||
|   typedef typename Expression::Index Index; | ||||
|   static void run(const Expression& expr, const GpuDevice& device); | ||||
| }; | ||||
|  | ||||
|  | ||||
| #if defined(__CUDACC__) | ||||
| template <typename Evaluator, typename Index, bool Vectorizable> | ||||
| struct EigenMetaKernelEval { | ||||
|   static __device__ EIGEN_ALWAYS_INLINE | ||||
|   void run(Evaluator& eval, Index first, Index last, Index step_size) { | ||||
|     for (Index i = first; i < last; i += step_size) { | ||||
|       eval.evalScalar(i); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Evaluator, typename Index> | ||||
| struct EigenMetaKernelEval<Evaluator, Index, true> { | ||||
|   static __device__ EIGEN_ALWAYS_INLINE | ||||
|   void run(Evaluator& eval, Index first, Index last, Index step_size) { | ||||
|     const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; | ||||
|     const Index vectorized_size = (last / PacketSize) * PacketSize; | ||||
|     const Index vectorized_step_size = step_size * PacketSize; | ||||
|  | ||||
|     // Use the vector path | ||||
|     for (Index i = first * PacketSize; i < vectorized_size; | ||||
|          i += vectorized_step_size) { | ||||
|       eval.evalPacket(i); | ||||
|     } | ||||
|     for (Index i = vectorized_size + first; i < last; i += step_size) { | ||||
|       eval.evalScalar(i); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Evaluator, typename Index> | ||||
| __global__ void | ||||
| __launch_bounds__(1024) | ||||
| EigenMetaKernel(Evaluator eval, Index size) { | ||||
|  | ||||
|   const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|   const Index step_size = blockDim.x * gridDim.x; | ||||
|  | ||||
|   const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; | ||||
|   EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size); | ||||
| } | ||||
|  | ||||
| /*static*/ | ||||
| template <typename Expression, bool Vectorizable> | ||||
| inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run( | ||||
|     const Expression& expr, const GpuDevice& device) { | ||||
|   TensorEvaluator<Expression, GpuDevice> evaluator(expr, device); | ||||
|   const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); | ||||
|   if (needs_assign) { | ||||
|     const int block_size = device.maxCudaThreadsPerBlock(); | ||||
|     const int max_blocks = device.getNumCudaMultiProcessors() * | ||||
|                            device.maxCudaThreadsPerMultiProcessor() / block_size; | ||||
|     const Index size = array_prod(evaluator.dimensions()); | ||||
|     // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. | ||||
|     const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1); | ||||
|  | ||||
|     LAUNCH_CUDA_KERNEL( | ||||
|         (EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index>), | ||||
|         num_blocks, block_size, 0, device, evaluator, size); | ||||
|   } | ||||
|   evaluator.cleanup(); | ||||
| } | ||||
|  | ||||
| #endif  // __CUDACC__ | ||||
| #endif  // EIGEN_USE_GPU | ||||
|  | ||||
| // SYCL Executor policy | ||||
| #ifdef EIGEN_USE_SYCL | ||||
|  | ||||
| template <typename Expression, bool Vectorizable> | ||||
| class TensorExecutor<Expression, SyclDevice, Vectorizable> { | ||||
| public: | ||||
|   static inline void run(const Expression &expr, const SyclDevice &device) { | ||||
|     // call TensorSYCL module | ||||
|     TensorSycl::run(expr, device); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #endif | ||||
|  | ||||
| } // end namespace internal | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H | ||||
							
								
								
									
										371
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										371
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,371 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorExpr | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor expression classes. | ||||
|   * | ||||
|   * The TensorCwiseNullaryOp class applies a nullary operators to an expression. | ||||
|   * This is typically used to generate constants. | ||||
|   * | ||||
|   * The TensorCwiseUnaryOp class represents an expression where a unary operator | ||||
|   * (e.g. cwiseSqrt) is applied to an expression. | ||||
|   * | ||||
|   * The TensorCwiseBinaryOp class represents an expression where a binary | ||||
|   * operator (e.g. addition) is applied to a lhs and a rhs expression. | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename NullaryOp, typename XprType> | ||||
| struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> > | ||||
|     : traits<XprType> | ||||
| { | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::Nested XprTypeNested; | ||||
|   typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
|  | ||||
|   enum { | ||||
|     Flags = 0 | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename NullaryOp, typename XprType> | ||||
| class TensorCwiseNullaryOp : public TensorBase<TensorCwiseNullaryOp<NullaryOp, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Scalar Scalar; | ||||
|     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|     typedef TensorCwiseNullaryOp<NullaryOp, XprType> Nested; | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::StorageKind StorageKind; | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Index Index; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseNullaryOp(const XprType& xpr, const NullaryOp& func = NullaryOp()) | ||||
|         : m_xpr(xpr), m_functor(func) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     nestedExpression() const { return m_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const NullaryOp& functor() const { return m_functor; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const NullaryOp m_functor; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| namespace internal { | ||||
| template<typename UnaryOp, typename XprType> | ||||
| struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> > | ||||
|     : traits<XprType> | ||||
| { | ||||
|   // TODO(phli): Add InputScalar, InputPacket.  Check references to | ||||
|   // current Scalar/Packet to see if the intent is Input or Output. | ||||
|   typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprType::Nested XprTypeNested; | ||||
|   typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename UnaryOp, typename XprType> | ||||
| struct eval<TensorCwiseUnaryOp<UnaryOp, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorCwiseUnaryOp<UnaryOp, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename UnaryOp, typename XprType> | ||||
| struct nested<TensorCwiseUnaryOp<UnaryOp, XprType>, 1, typename eval<TensorCwiseUnaryOp<UnaryOp, XprType> >::type> | ||||
| { | ||||
|   typedef TensorCwiseUnaryOp<UnaryOp, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename UnaryOp, typename XprType> | ||||
| class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|     // TODO(phli): Add InputScalar, InputPacket.  Check references to | ||||
|     // current Scalar/Packet to see if the intent is Input or Output. | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar; | ||||
|     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef Scalar CoeffReturnType; | ||||
|     typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested; | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind; | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) | ||||
|       : m_xpr(xpr), m_functor(func) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const UnaryOp& functor() const { return m_functor; } | ||||
|  | ||||
|     /** \returns the nested expression */ | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     nestedExpression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const UnaryOp m_functor; | ||||
| }; | ||||
|  | ||||
|  | ||||
| namespace internal { | ||||
| template<typename BinaryOp, typename LhsXprType, typename RhsXprType> | ||||
| struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> > | ||||
| { | ||||
|   // Type promotion to handle the case where the types of the lhs and the rhs | ||||
|   // are different. | ||||
|   // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket.  Check references to | ||||
|   // current Scalar/Packet to see if the intent is Inputs or Output. | ||||
|   typedef typename result_of< | ||||
|       BinaryOp(typename LhsXprType::Scalar, | ||||
|                typename RhsXprType::Scalar)>::type Scalar; | ||||
|   typedef traits<LhsXprType> XprTraits; | ||||
|   typedef typename promote_storage_type< | ||||
|       typename traits<LhsXprType>::StorageKind, | ||||
|       typename traits<RhsXprType>::StorageKind>::ret StorageKind; | ||||
|   typedef typename promote_index_type< | ||||
|       typename traits<LhsXprType>::Index, | ||||
|       typename traits<RhsXprType>::Index>::type Index; | ||||
|   typedef typename LhsXprType::Nested LhsNested; | ||||
|   typedef typename RhsXprType::Nested RhsNested; | ||||
|   typedef typename remove_reference<LhsNested>::type _LhsNested; | ||||
|   typedef typename remove_reference<RhsNested>::type _RhsNested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
|  | ||||
|   enum { | ||||
|     Flags = 0 | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename BinaryOp, typename LhsXprType, typename RhsXprType> | ||||
| struct eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename BinaryOp, typename LhsXprType, typename RhsXprType> | ||||
| struct nested<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, 1, typename eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >::type> | ||||
| { | ||||
|   typedef TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename BinaryOp, typename LhsXprType, typename RhsXprType> | ||||
| class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|     // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket.  Check references to | ||||
|     // current Scalar/Packet to see if the intent is Inputs or Output. | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar; | ||||
|     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef Scalar CoeffReturnType; | ||||
|     typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested; | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind; | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) | ||||
|         : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const BinaryOp& functor() const { return m_functor; } | ||||
|  | ||||
|     /** \returns the nested expressions */ | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename LhsXprType::Nested>::type& | ||||
|     lhsExpression() const { return m_lhs_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename RhsXprType::Nested>::type& | ||||
|     rhsExpression() const { return m_rhs_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename LhsXprType::Nested m_lhs_xpr; | ||||
|     typename RhsXprType::Nested m_rhs_xpr; | ||||
|     const BinaryOp m_functor; | ||||
| }; | ||||
|  | ||||
|  | ||||
| namespace internal { | ||||
| template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> | ||||
| struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> > | ||||
| { | ||||
|   // Type promotion to handle the case where the types of the args are different. | ||||
|   typedef typename result_of< | ||||
|       TernaryOp(typename Arg1XprType::Scalar, | ||||
|                 typename Arg2XprType::Scalar, | ||||
|                 typename Arg3XprType::Scalar)>::type Scalar; | ||||
|   typedef traits<Arg1XprType> XprTraits; | ||||
|   typedef typename traits<Arg1XprType>::StorageKind StorageKind; | ||||
|   typedef typename traits<Arg1XprType>::Index Index; | ||||
|   typedef typename Arg1XprType::Nested Arg1Nested; | ||||
|   typedef typename Arg2XprType::Nested Arg2Nested; | ||||
|   typedef typename Arg3XprType::Nested Arg3Nested; | ||||
|   typedef typename remove_reference<Arg1Nested>::type _Arg1Nested; | ||||
|   typedef typename remove_reference<Arg2Nested>::type _Arg2Nested; | ||||
|   typedef typename remove_reference<Arg3Nested>::type _Arg3Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
|  | ||||
|   enum { | ||||
|     Flags = 0 | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> | ||||
| struct eval<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> | ||||
| struct nested<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, 1, typename eval<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> >::type> | ||||
| { | ||||
|   typedef TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> | ||||
| class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::Scalar Scalar; | ||||
|     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef Scalar CoeffReturnType; | ||||
|     typedef typename Eigen::internal::nested<TensorCwiseTernaryOp>::type Nested; | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::StorageKind StorageKind; | ||||
|     typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::Index Index; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp()) | ||||
|         : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const TernaryOp& functor() const { return m_functor; } | ||||
|  | ||||
|     /** \returns the nested expressions */ | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename Arg1XprType::Nested>::type& | ||||
|     arg1Expression() const { return m_arg1_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename Arg2XprType::Nested>::type& | ||||
|     arg2Expression() const { return m_arg2_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename Arg3XprType::Nested>::type& | ||||
|     arg3Expression() const { return m_arg3_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename Arg1XprType::Nested m_arg1_xpr; | ||||
|     typename Arg2XprType::Nested m_arg2_xpr; | ||||
|     typename Arg3XprType::Nested m_arg3_xpr; | ||||
|     const TernaryOp m_functor; | ||||
| }; | ||||
|  | ||||
|  | ||||
| namespace internal { | ||||
| template<typename IfXprType, typename ThenXprType, typename ElseXprType> | ||||
| struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> > | ||||
|     : traits<ThenXprType> | ||||
| { | ||||
|   typedef typename traits<ThenXprType>::Scalar Scalar; | ||||
|   typedef traits<ThenXprType> XprTraits; | ||||
|   typedef typename promote_storage_type<typename traits<ThenXprType>::StorageKind, | ||||
|                                         typename traits<ElseXprType>::StorageKind>::ret StorageKind; | ||||
|   typedef typename promote_index_type<typename traits<ElseXprType>::Index, | ||||
|                                       typename traits<ThenXprType>::Index>::type Index; | ||||
|   typedef typename IfXprType::Nested IfNested; | ||||
|   typedef typename ThenXprType::Nested ThenNested; | ||||
|   typedef typename ElseXprType::Nested ElseNested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename IfXprType, typename ThenXprType, typename ElseXprType> | ||||
| struct eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorSelectOp<IfXprType, ThenXprType, ElseXprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename IfXprType, typename ThenXprType, typename ElseXprType> | ||||
| struct nested<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, 1, typename eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >::type> | ||||
| { | ||||
|   typedef TensorSelectOp<IfXprType, ThenXprType, ElseXprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
| template<typename IfXprType, typename ThenXprType, typename ElseXprType> | ||||
| class TensorSelectOp : public TensorBase<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|     typedef typename Eigen::internal::traits<TensorSelectOp>::Scalar Scalar; | ||||
|     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef typename internal::promote_storage_type<typename ThenXprType::CoeffReturnType, | ||||
|                                                     typename ElseXprType::CoeffReturnType>::ret CoeffReturnType; | ||||
|     typedef typename Eigen::internal::nested<TensorSelectOp>::type Nested; | ||||
|     typedef typename Eigen::internal::traits<TensorSelectOp>::StorageKind StorageKind; | ||||
|     typedef typename Eigen::internal::traits<TensorSelectOp>::Index Index; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     TensorSelectOp(const IfXprType& a_condition, | ||||
|                    const ThenXprType& a_then, | ||||
|                    const ElseXprType& a_else) | ||||
|       : m_condition(a_condition), m_then(a_then), m_else(a_else) | ||||
|     { } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const IfXprType& ifExpression() const { return m_condition; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const ThenXprType& thenExpression() const { return m_then; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const ElseXprType& elseExpression() const { return m_else; } | ||||
|  | ||||
|   protected: | ||||
|     typename IfXprType::Nested m_condition; | ||||
|     typename ThenXprType::Nested m_then; | ||||
|     typename ElseXprType::Nested m_else; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_EXPR_H | ||||
							
								
								
									
										651
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										651
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,651 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H | ||||
|  | ||||
| // This code requires the ability to initialize arrays of constant | ||||
| // values directly inside a class. | ||||
| #if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900 | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorFFT | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor FFT class. | ||||
|   * | ||||
|   * TODO: | ||||
|   * Vectorize the Cooley Tukey and the Bluestein algorithm | ||||
|   * Add support for multithreaded evaluation | ||||
|   * Improve the performance on GPU | ||||
|   */ | ||||
|  | ||||
| template <bool NeedUprade> struct MakeComplex { | ||||
|   template <typename T> | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   T operator() (const T& val) const { return val; } | ||||
| }; | ||||
|  | ||||
| template <> struct MakeComplex<true> { | ||||
|   template <typename T> | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); } | ||||
| }; | ||||
|  | ||||
| template <> struct MakeComplex<false> { | ||||
|   template <typename T> | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   std::complex<T> operator() (const std::complex<T>& val) const { return val; } | ||||
| }; | ||||
|  | ||||
| template <int ResultType> struct PartOf { | ||||
|   template <typename T> T operator() (const T& val) const { return val; } | ||||
| }; | ||||
|  | ||||
| template <> struct PartOf<RealPart> { | ||||
|   template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); } | ||||
| }; | ||||
|  | ||||
| template <> struct PartOf<ImagPart> { | ||||
|   template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); } | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
| template <typename FFT, typename XprType, int FFTResultType, int FFTDir> | ||||
| struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> { | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar; | ||||
|   typedef typename std::complex<RealScalar> ComplexScalar; | ||||
|   typedef typename XprTraits::Scalar InputScalar; | ||||
|   typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> | ||||
| struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> { | ||||
|   typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type; | ||||
| }; | ||||
|  | ||||
| template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> | ||||
| struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> { | ||||
|   typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| template <typename FFT, typename XprType, int FFTResultType, int FFTDir> | ||||
| class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> { | ||||
|  public: | ||||
|   typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename std::complex<RealScalar> ComplexScalar; | ||||
|   typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; | ||||
|   typedef OutputScalar CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) | ||||
|       : m_xpr(expr), m_fft(fft) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const FFT& fft() const { return m_fft; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   const typename internal::remove_all<typename XprType::Nested>::type& expression() const { | ||||
|     return m_xpr; | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   typename XprType::Nested m_xpr; | ||||
|   const FFT m_fft; | ||||
| }; | ||||
|  | ||||
| // Eval as rvalue | ||||
| template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir> | ||||
| struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> { | ||||
|   typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename std::complex<RealScalar> ComplexScalar; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; | ||||
|   typedef internal::traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::Scalar InputScalar; | ||||
|   typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; | ||||
|   typedef OutputScalar CoeffReturnType; | ||||
|   typedef typename PacketType<OutputScalar, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = true, | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       eigen_assert(input_dims[i] > 0); | ||||
|       m_dimensions[i] = input_dims[i]; | ||||
|     } | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_strides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; | ||||
|       } | ||||
|     } else { | ||||
|       m_strides[NumDims - 1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; | ||||
|       } | ||||
|     } | ||||
|     m_size = m_dimensions.TotalSize(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { | ||||
|     return m_dimensions; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     if (data) { | ||||
|       evalToBuf(data); | ||||
|       return false; | ||||
|     } else { | ||||
|       m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); | ||||
|       evalToBuf(m_data); | ||||
|       return true; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     if (m_data) { | ||||
|       m_device.deallocate(m_data); | ||||
|       m_data = NULL; | ||||
|     } | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { | ||||
|     return m_data[index]; | ||||
|   } | ||||
|  | ||||
|   template <int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType | ||||
|   packet(Index index) const { | ||||
|     return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } | ||||
|  | ||||
|  | ||||
|  private: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { | ||||
|     const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value; | ||||
|     ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); | ||||
|  | ||||
|     for (Index i = 0; i < m_size; ++i) { | ||||
|       buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i)); | ||||
|     } | ||||
|  | ||||
|     for (size_t i = 0; i < m_fft.size(); ++i) { | ||||
|       Index dim = m_fft[i]; | ||||
|       eigen_assert(dim >= 0 && dim < NumDims); | ||||
|       Index line_len = m_dimensions[dim]; | ||||
|       eigen_assert(line_len >= 1); | ||||
|       ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); | ||||
|       const bool is_power_of_two = isPowerOfTwo(line_len); | ||||
|       const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); | ||||
|       const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); | ||||
|  | ||||
|       ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); | ||||
|       ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); | ||||
|       ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); | ||||
|       if (!is_power_of_two) { | ||||
|         // Compute twiddle factors | ||||
|         //   t_n = exp(sqrt(-1) * pi * n^2 / line_len) | ||||
|         // for n = 0, 1,..., line_len-1. | ||||
|         // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 | ||||
|         pos_j_base_powered[0] = ComplexScalar(1, 0); | ||||
|         if (line_len > 1) { | ||||
|           const RealScalar pi_over_len(EIGEN_PI / line_len); | ||||
|           const ComplexScalar pos_j_base = ComplexScalar( | ||||
| 	       std::cos(pi_over_len), std::sin(pi_over_len)); | ||||
|           pos_j_base_powered[1] = pos_j_base; | ||||
|           if (line_len > 2) { | ||||
|             const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; | ||||
|             for (int j = 2; j < line_len + 1; ++j) { | ||||
|               pos_j_base_powered[j] = pos_j_base_powered[j - 1] * | ||||
|                                       pos_j_base_powered[j - 1] / | ||||
|                                       pos_j_base_powered[j - 2] * pos_j_base_sq; | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { | ||||
|         const Index base_offset = getBaseOffsetFromIndex(partial_index, dim); | ||||
|  | ||||
|         // get data into line_buf | ||||
|         const Index stride = m_strides[dim]; | ||||
|         if (stride == 1) { | ||||
|           memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); | ||||
|         } else { | ||||
|           Index offset = base_offset; | ||||
|           for (int j = 0; j < line_len; ++j, offset += stride) { | ||||
|             line_buf[j] = buf[offset]; | ||||
|           } | ||||
|         } | ||||
|  | ||||
|         // processs the line | ||||
|         if (is_power_of_two) { | ||||
|           processDataLineCooleyTukey(line_buf, line_len, log_len); | ||||
|         } | ||||
|         else { | ||||
|           processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); | ||||
|         } | ||||
|  | ||||
|         // write back | ||||
|         if (FFTDir == FFT_FORWARD && stride == 1) { | ||||
|           memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); | ||||
|         } else { | ||||
|           Index offset = base_offset; | ||||
|           const ComplexScalar div_factor =  ComplexScalar(1.0 / line_len, 0); | ||||
|           for (int j = 0; j < line_len; ++j, offset += stride) { | ||||
|              buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor; | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|       m_device.deallocate(line_buf); | ||||
|       if (!is_power_of_two) { | ||||
|         m_device.deallocate(a); | ||||
|         m_device.deallocate(b); | ||||
|         m_device.deallocate(pos_j_base_powered); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     if(!write_to_out) { | ||||
|       for (Index i = 0; i < m_size; ++i) { | ||||
|         data[i] = PartOf<FFTResultType>()(buf[i]); | ||||
|       } | ||||
|       m_device.deallocate(buf); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { | ||||
|     eigen_assert(x > 0); | ||||
|     return !(x & (x - 1)); | ||||
|   } | ||||
|  | ||||
|   // The composite number for padding, used in Bluestein's FFT algorithm | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { | ||||
|     Index i = 2; | ||||
|     while (i < 2 * n - 1) i *= 2; | ||||
|     return i; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { | ||||
|     Index log2m = 0; | ||||
|     while (m >>= 1) log2m++; | ||||
|     return log2m; | ||||
|   } | ||||
|  | ||||
|   // Call Cooley Tukey algorithm directly, data length must be power of 2 | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { | ||||
|     eigen_assert(isPowerOfTwo(line_len)); | ||||
|     scramble_FFT(line_buf, line_len); | ||||
|     compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len); | ||||
|   } | ||||
|  | ||||
|   // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { | ||||
|     Index n = line_len; | ||||
|     Index m = good_composite; | ||||
|     ComplexScalar* data = line_buf; | ||||
|  | ||||
|     for (Index i = 0; i < n; ++i) { | ||||
|       if(FFTDir == FFT_FORWARD) { | ||||
|         a[i] = data[i] * numext::conj(pos_j_base_powered[i]); | ||||
|       } | ||||
|       else { | ||||
|         a[i] = data[i] * pos_j_base_powered[i]; | ||||
|       } | ||||
|     } | ||||
|     for (Index i = n; i < m; ++i) { | ||||
|       a[i] = ComplexScalar(0, 0); | ||||
|     } | ||||
|  | ||||
|     for (Index i = 0; i < n; ++i) { | ||||
|       if(FFTDir == FFT_FORWARD) { | ||||
|         b[i] = pos_j_base_powered[i]; | ||||
|       } | ||||
|       else { | ||||
|         b[i] = numext::conj(pos_j_base_powered[i]); | ||||
|       } | ||||
|     } | ||||
|     for (Index i = n; i < m - n; ++i) { | ||||
|       b[i] = ComplexScalar(0, 0); | ||||
|     } | ||||
|     for (Index i = m - n; i < m; ++i) { | ||||
|       if(FFTDir == FFT_FORWARD) { | ||||
|         b[i] = pos_j_base_powered[m-i]; | ||||
|       } | ||||
|       else { | ||||
|         b[i] = numext::conj(pos_j_base_powered[m-i]); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     scramble_FFT(a, m); | ||||
|     compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len); | ||||
|  | ||||
|     scramble_FFT(b, m); | ||||
|     compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len); | ||||
|  | ||||
|     for (Index i = 0; i < m; ++i) { | ||||
|       a[i] *= b[i]; | ||||
|     } | ||||
|  | ||||
|     scramble_FFT(a, m); | ||||
|     compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len); | ||||
|  | ||||
|     //Do the scaling after ifft | ||||
|     for (Index i = 0; i < m; ++i) { | ||||
|       a[i] /= m; | ||||
|     } | ||||
|  | ||||
|     for (Index i = 0; i < n; ++i) { | ||||
|       if(FFTDir == FFT_FORWARD) { | ||||
|         data[i] = a[i] * numext::conj(pos_j_base_powered[i]); | ||||
|       } | ||||
|       else { | ||||
|         data[i] = a[i] * pos_j_base_powered[i]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { | ||||
|     eigen_assert(isPowerOfTwo(n)); | ||||
|     Index j = 1; | ||||
|     for (Index i = 1; i < n; ++i){ | ||||
|       if (j > i) { | ||||
|         std::swap(data[j-1], data[i-1]); | ||||
|       } | ||||
|       Index m = n >> 1; | ||||
|       while (m >= 2 && j > m) { | ||||
|         j -= m; | ||||
|         m >>= 1; | ||||
|       } | ||||
|       j += m; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <int Dir> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) { | ||||
|     ComplexScalar tmp = data[1]; | ||||
|     data[1] = data[0] - data[1]; | ||||
|     data[0] += tmp; | ||||
|   } | ||||
|  | ||||
|   template <int Dir> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) { | ||||
|     ComplexScalar tmp[4]; | ||||
|     tmp[0] = data[0] + data[1]; | ||||
|     tmp[1] = data[0] - data[1]; | ||||
|     tmp[2] = data[2] + data[3]; | ||||
|     if (Dir == FFT_FORWARD) { | ||||
|       tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); | ||||
|     } else { | ||||
|       tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); | ||||
|     } | ||||
|     data[0] = tmp[0] + tmp[2]; | ||||
|     data[1] = tmp[1] + tmp[3]; | ||||
|     data[2] = tmp[0] - tmp[2]; | ||||
|     data[3] = tmp[1] - tmp[3]; | ||||
|   } | ||||
|  | ||||
|   template <int Dir> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) { | ||||
|     ComplexScalar tmp_1[8]; | ||||
|     ComplexScalar tmp_2[8]; | ||||
|  | ||||
|     tmp_1[0] = data[0] + data[1]; | ||||
|     tmp_1[1] = data[0] - data[1]; | ||||
|     tmp_1[2] = data[2] + data[3]; | ||||
|     if (Dir == FFT_FORWARD) { | ||||
|       tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); | ||||
|     } else { | ||||
|       tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); | ||||
|     } | ||||
|     tmp_1[4] = data[4] + data[5]; | ||||
|     tmp_1[5] = data[4] - data[5]; | ||||
|     tmp_1[6] = data[6] + data[7]; | ||||
|     if (Dir == FFT_FORWARD) { | ||||
|       tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); | ||||
|     } else { | ||||
|       tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); | ||||
|     } | ||||
|     tmp_2[0] = tmp_1[0] + tmp_1[2]; | ||||
|     tmp_2[1] = tmp_1[1] + tmp_1[3]; | ||||
|     tmp_2[2] = tmp_1[0] - tmp_1[2]; | ||||
|     tmp_2[3] = tmp_1[1] - tmp_1[3]; | ||||
|     tmp_2[4] = tmp_1[4] + tmp_1[6]; | ||||
| // SQRT2DIV2 = sqrt(2)/2 | ||||
| #define SQRT2DIV2 0.7071067811865476 | ||||
|     if (Dir == FFT_FORWARD) { | ||||
|       tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); | ||||
|       tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); | ||||
|       tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); | ||||
|     } else { | ||||
|       tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); | ||||
|       tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); | ||||
|       tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); | ||||
|     } | ||||
|     data[0] = tmp_2[0] + tmp_2[4]; | ||||
|     data[1] = tmp_2[1] + tmp_2[5]; | ||||
|     data[2] = tmp_2[2] + tmp_2[6]; | ||||
|     data[3] = tmp_2[3] + tmp_2[7]; | ||||
|     data[4] = tmp_2[0] - tmp_2[4]; | ||||
|     data[5] = tmp_2[1] - tmp_2[5]; | ||||
|     data[6] = tmp_2[2] - tmp_2[6]; | ||||
|     data[7] = tmp_2[3] - tmp_2[7]; | ||||
|   } | ||||
|  | ||||
|   template <int Dir> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge( | ||||
|       ComplexScalar* data, Index n, Index n_power_of_2) { | ||||
|     // Original code: | ||||
|     // RealScalar wtemp = std::sin(M_PI/n); | ||||
|     // RealScalar wpi =  -std::sin(2 * M_PI/n); | ||||
|     const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; | ||||
|     const RealScalar wpi = (Dir == FFT_FORWARD) | ||||
|                                ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] | ||||
|                                : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; | ||||
|  | ||||
|     const ComplexScalar wp(wtemp, wpi); | ||||
|     const ComplexScalar wp_one = wp + ComplexScalar(1, 0); | ||||
|     const ComplexScalar wp_one_2 = wp_one * wp_one; | ||||
|     const ComplexScalar wp_one_3 = wp_one_2 * wp_one; | ||||
|     const ComplexScalar wp_one_4 = wp_one_3 * wp_one; | ||||
|     const Index n2 = n / 2; | ||||
|     ComplexScalar w(1.0, 0.0); | ||||
|     for (Index i = 0; i < n2; i += 4) { | ||||
|        ComplexScalar temp0(data[i + n2] * w); | ||||
|        ComplexScalar temp1(data[i + 1 + n2] * w * wp_one); | ||||
|        ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2); | ||||
|        ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3); | ||||
|        w = w * wp_one_4; | ||||
|  | ||||
|        data[i + n2] = data[i] - temp0; | ||||
|        data[i] += temp0; | ||||
|  | ||||
|        data[i + 1 + n2] = data[i + 1] - temp1; | ||||
|        data[i + 1] += temp1; | ||||
|  | ||||
|        data[i + 2 + n2] = data[i + 2] - temp2; | ||||
|        data[i + 2] += temp2; | ||||
|  | ||||
|        data[i + 3 + n2] = data[i + 3] - temp3; | ||||
|        data[i + 3] += temp3; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  template <int Dir> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly( | ||||
|       ComplexScalar* data, Index n, Index n_power_of_2) { | ||||
|     eigen_assert(isPowerOfTwo(n)); | ||||
|     if (n > 8) { | ||||
|       compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1); | ||||
|       compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1); | ||||
|       butterfly_1D_merge<Dir>(data, n, n_power_of_2); | ||||
|     } else if (n == 8) { | ||||
|       butterfly_8<Dir>(data); | ||||
|     } else if (n == 4) { | ||||
|       butterfly_4<Dir>(data); | ||||
|     } else if (n == 2) { | ||||
|       butterfly_2<Dir>(data); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { | ||||
|     Index result = 0; | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > omitted_dim; --i) { | ||||
|         const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; | ||||
|         const Index idx = index / partial_m_stride; | ||||
|         index -= idx * partial_m_stride; | ||||
|         result += idx * m_strides[i]; | ||||
|       } | ||||
|       result += index; | ||||
|     } | ||||
|     else { | ||||
|       for (Index i = 0; i < omitted_dim; ++i) { | ||||
|         const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; | ||||
|         const Index idx = index / partial_m_stride; | ||||
|         index -= idx * partial_m_stride; | ||||
|         result += idx * m_strides[i]; | ||||
|       } | ||||
|       result += index; | ||||
|     } | ||||
|     // Value of index_coords[omitted_dim] is not determined to this step | ||||
|     return result; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { | ||||
|     Index result = base + offset * m_strides[omitted_dim] ; | ||||
|     return result; | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   Index m_size; | ||||
|   const FFT& m_fft; | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims> m_strides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   CoeffReturnType* m_data; | ||||
|   const Device& m_device; | ||||
|  | ||||
|   // This will support a maximum FFT size of 2^32 for each dimension | ||||
|   // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; | ||||
|   const RealScalar m_sin_PI_div_n_LUT[32] = { | ||||
|     RealScalar(0.0), | ||||
|     RealScalar(-2), | ||||
|     RealScalar(-0.999999999999999), | ||||
|     RealScalar(-0.292893218813453), | ||||
|     RealScalar(-0.0761204674887130), | ||||
|     RealScalar(-0.0192147195967696), | ||||
|     RealScalar(-0.00481527332780311), | ||||
|     RealScalar(-0.00120454379482761), | ||||
|     RealScalar(-3.01181303795779e-04), | ||||
|     RealScalar(-7.52981608554592e-05), | ||||
|     RealScalar(-1.88247173988574e-05), | ||||
|     RealScalar(-4.70619042382852e-06), | ||||
|     RealScalar(-1.17654829809007e-06), | ||||
|     RealScalar(-2.94137117780840e-07), | ||||
|     RealScalar(-7.35342821488550e-08), | ||||
|     RealScalar(-1.83835707061916e-08), | ||||
|     RealScalar(-4.59589268710903e-09), | ||||
|     RealScalar(-1.14897317243732e-09), | ||||
|     RealScalar(-2.87243293150586e-10), | ||||
|     RealScalar( -7.18108232902250e-11), | ||||
|     RealScalar(-1.79527058227174e-11), | ||||
|     RealScalar(-4.48817645568941e-12), | ||||
|     RealScalar(-1.12204411392298e-12), | ||||
|     RealScalar(-2.80511028480785e-13), | ||||
|     RealScalar(-7.01277571201985e-14), | ||||
|     RealScalar(-1.75319392800498e-14), | ||||
|     RealScalar(-4.38298482001247e-15), | ||||
|     RealScalar(-1.09574620500312e-15), | ||||
|     RealScalar(-2.73936551250781e-16), | ||||
|     RealScalar(-6.84841378126949e-17), | ||||
|     RealScalar(-1.71210344531737e-17), | ||||
|     RealScalar(-4.28025861329343e-18) | ||||
|   }; | ||||
|  | ||||
|   // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); | ||||
|   const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { | ||||
|     RealScalar(0.0), | ||||
|     RealScalar(0.0), | ||||
|     RealScalar(-1.00000000000000e+00), | ||||
|     RealScalar(-7.07106781186547e-01), | ||||
|     RealScalar(-3.82683432365090e-01), | ||||
|     RealScalar(-1.95090322016128e-01), | ||||
|     RealScalar(-9.80171403295606e-02), | ||||
|     RealScalar(-4.90676743274180e-02), | ||||
|     RealScalar(-2.45412285229123e-02), | ||||
|     RealScalar(-1.22715382857199e-02), | ||||
|     RealScalar(-6.13588464915448e-03), | ||||
|     RealScalar(-3.06795676296598e-03), | ||||
|     RealScalar(-1.53398018628477e-03), | ||||
|     RealScalar(-7.66990318742704e-04), | ||||
|     RealScalar(-3.83495187571396e-04), | ||||
|     RealScalar(-1.91747597310703e-04), | ||||
|     RealScalar(-9.58737990959773e-05), | ||||
|     RealScalar(-4.79368996030669e-05), | ||||
|     RealScalar(-2.39684498084182e-05), | ||||
|     RealScalar(-1.19842249050697e-05), | ||||
|     RealScalar(-5.99211245264243e-06), | ||||
|     RealScalar(-2.99605622633466e-06), | ||||
|     RealScalar(-1.49802811316901e-06), | ||||
|     RealScalar(-7.49014056584716e-07), | ||||
|     RealScalar(-3.74507028292384e-07), | ||||
|     RealScalar(-1.87253514146195e-07), | ||||
|     RealScalar(-9.36267570730981e-08), | ||||
|     RealScalar(-4.68133785365491e-08), | ||||
|     RealScalar(-2.34066892682746e-08), | ||||
|     RealScalar(-1.17033446341373e-08), | ||||
|     RealScalar(-5.85167231706864e-09), | ||||
|     RealScalar(-2.92583615853432e-09) | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_HAS_CONSTEXPR | ||||
|  | ||||
|  | ||||
| #endif  // EIGEN_CXX11_TENSOR_TENSOR_FFT_H | ||||
							
								
								
									
										389
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										389
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,389 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorFixedSize | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief The fixed sized version of the tensor class. | ||||
|   * | ||||
|   * The fixed sized equivalent of | ||||
|   * Eigen::Tensor<float, 3> t(3, 5, 7); | ||||
|   * is | ||||
|   * Eigen::TensorFixedSize<float, Size<3,5,7>> t; | ||||
|   */ | ||||
|  | ||||
| template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType> | ||||
| class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> > | ||||
| { | ||||
|   public: | ||||
|     typedef TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> Self; | ||||
|     typedef TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> > Base; | ||||
|     typedef typename Eigen::internal::nested<Self>::type Nested; | ||||
|     typedef typename internal::traits<Self>::StorageKind StorageKind; | ||||
|     typedef typename internal::traits<Self>::Index Index; | ||||
|     typedef Scalar_ Scalar; | ||||
|     typedef typename NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef typename Base::CoeffReturnType CoeffReturnType; | ||||
|  | ||||
|     static const int Options = Options_; | ||||
|  | ||||
|     enum { | ||||
|       IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0), | ||||
|       Layout = Options_ & RowMajor ? RowMajor : ColMajor, | ||||
|       CoordAccess = true, | ||||
|       RawAccess = true | ||||
|     }; | ||||
|  | ||||
|   typedef Dimensions_ Dimensions; | ||||
|   static const std::size_t NumIndices = Dimensions::count; | ||||
|  | ||||
|   protected: | ||||
|   TensorStorage<Scalar, Dimensions, Options> m_storage; | ||||
|  | ||||
|   public: | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                    rank()                   const { return NumIndices; } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                    dimension(std::size_t n) const { return m_storage.dimensions()[n]; } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions&        dimensions()             const { return m_storage.dimensions(); } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index                    size()                   const { return m_storage.size(); } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar                   *data()                        { return m_storage.data(); } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar             *data()                  const { return m_storage.data(); } | ||||
|  | ||||
|     // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED | ||||
|     // work, because that uses base().coeffRef() - and we don't yet | ||||
|     // implement a similar class hierarchy | ||||
|     inline Self& base()             { return *this; } | ||||
|     inline const Self& base() const { return *this; } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const | ||||
|     { | ||||
|       // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}}); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       eigen_internal_assert(checkIndexRange(indices)); | ||||
|       return m_storage.data()[linearizedIndex(indices)]; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const | ||||
|     { | ||||
|       eigen_internal_assert(index >= 0 && index < size()); | ||||
|       return m_storage.data()[index]; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& coeff() const | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return m_storage.data()[0]; | ||||
|     } | ||||
|  | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) | ||||
|     { | ||||
|       // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}}); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) | ||||
|     { | ||||
|       eigen_internal_assert(checkIndexRange(indices)); | ||||
|       return m_storage.data()[linearizedIndex(indices)]; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) | ||||
|     { | ||||
|       eigen_internal_assert(index >= 0 && index < size()); | ||||
|       return m_storage.data()[index]; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef() | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return m_storage.data()[0]; | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const | ||||
|     { | ||||
|       // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); | ||||
|     } | ||||
| #else | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const | ||||
|     { | ||||
|       if (Options&RowMajor) { | ||||
|         const Index index = i1 + i0 * m_storage.dimensions()[1]; | ||||
|         return m_storage.data()[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + i1 * m_storage.dimensions()[0]; | ||||
|         return m_storage.data()[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const | ||||
|     { | ||||
|       if (Options&RowMajor) { | ||||
|          const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); | ||||
|          return m_storage.data()[index]; | ||||
|       } else { | ||||
|          const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); | ||||
|         return m_storage.data()[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const | ||||
|     { | ||||
|       if (Options&RowMajor) { | ||||
|         const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); | ||||
|         return m_storage.data()[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); | ||||
|         return m_storage.data()[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const | ||||
|     { | ||||
|       if (Options&RowMajor) { | ||||
|         const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); | ||||
|         return m_storage.data()[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); | ||||
|         return m_storage.data()[index]; | ||||
|       } | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       eigen_assert(checkIndexRange(indices)); | ||||
|       return coeff(indices); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const | ||||
|     { | ||||
|       eigen_internal_assert(index >= 0 && index < size()); | ||||
|       return coeff(index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()() const | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return coeff(); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const | ||||
|     { | ||||
|       // The bracket operator is only for vectors, use the parenthesis operator instead. | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return coeff(index); | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) | ||||
|     { | ||||
|       // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); | ||||
|     } | ||||
| #else | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) | ||||
|     { | ||||
|        if (Options&RowMajor) { | ||||
|          const Index index = i1 + i0 * m_storage.dimensions()[1]; | ||||
|         return m_storage.data()[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + i1 * m_storage.dimensions()[0]; | ||||
|         return m_storage.data()[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) | ||||
|     { | ||||
|        if (Options&RowMajor) { | ||||
|          const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); | ||||
|         return m_storage.data()[index]; | ||||
|       } else { | ||||
|          const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); | ||||
|         return m_storage.data()[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) | ||||
|     { | ||||
|       if (Options&RowMajor) { | ||||
|         const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); | ||||
|         return m_storage.data()[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); | ||||
|         return m_storage.data()[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) | ||||
|     { | ||||
|       if (Options&RowMajor) { | ||||
|         const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); | ||||
|         return m_storage.data()[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); | ||||
|         return m_storage.data()[index]; | ||||
|       } | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) | ||||
|     { | ||||
|       eigen_assert(checkIndexRange(indices)); | ||||
|       return coeffRef(indices); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index index) | ||||
|     { | ||||
|       eigen_assert(index >= 0 && index < size()); | ||||
|       return coeffRef(index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()() | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return coeffRef(); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator[](Index index) | ||||
|     { | ||||
|       // The bracket operator is only for vectors, use the parenthesis operator instead | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return coeffRef(index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorFixedSize() | ||||
|       : m_storage() | ||||
|     { | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorFixedSize(const Self& other) | ||||
|       : m_storage(other.m_storage) | ||||
|     { | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_RVALUE_REFERENCES | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other) | ||||
|       : m_storage(other.m_storage) | ||||
|     { | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorFixedSize, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other.derived()); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|     } | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase<OtherDerived, WriteAccessors>& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorFixedSize, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other.derived()); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorFixedSize& operator=(const TensorFixedSize& other) | ||||
|     { | ||||
|       // FIXME: check that the dimensions of other match the dimensions of *this. | ||||
|       // Unfortunately this isn't possible yet when the rhs is an expression. | ||||
|       typedef TensorAssignOp<Self, const TensorFixedSize> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorFixedSize& operator=(const OtherDerived& other) | ||||
|     { | ||||
|       // FIXME: check that the dimensions of other match the dimensions of *this. | ||||
|       // Unfortunately this isn't possible yet when the rhs is an expression. | ||||
|       typedef TensorAssignOp<Self, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE bool checkIndexRange(const array<Index, NumIndices>& /*indices*/) const | ||||
|     { | ||||
|       using internal::array_apply_and_reduce; | ||||
|       using internal::array_zip_and_reduce; | ||||
|       using internal::greater_equal_zero_op; | ||||
|       using internal::logical_and_op; | ||||
|       using internal::lesser_op; | ||||
|  | ||||
|       return true; | ||||
|         // check whether the indices are all >= 0 | ||||
|           /*       array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) && | ||||
|         // check whether the indices fit in the dimensions | ||||
|         array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions());*/ | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       if (Options&RowMajor) { | ||||
|         return m_storage.dimensions().IndexOfRowMajor(indices); | ||||
|       } else { | ||||
|         return m_storage.dimensions().IndexOfColMajor(indices); | ||||
|       } | ||||
|     } | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H | ||||
							
								
								
									
										169
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										169
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,169 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
| template<typename XprType, template <class> class MakePointer_> | ||||
| struct traits<TensorForcedEvalOp<XprType, MakePointer_> > | ||||
| { | ||||
|   // Type promotion to handle the case where the types of the lhs and the rhs are different. | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename traits<XprType>::StorageKind StorageKind; | ||||
|   typedef typename traits<XprType>::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
|  | ||||
|   enum { | ||||
|     Flags = 0 | ||||
|   }; | ||||
|   template <class T> struct MakePointer { | ||||
|     // Intermediate typedef to workaround MSVC issue. | ||||
|     typedef MakePointer_<T> MakePointerT; | ||||
|     typedef typename MakePointerT::Type Type; | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename XprType, template <class> class MakePointer_> | ||||
| struct eval<TensorForcedEvalOp<XprType, MakePointer_>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorForcedEvalOp<XprType, MakePointer_>& type; | ||||
| }; | ||||
|  | ||||
| template<typename XprType, template <class> class MakePointer_> | ||||
| struct nested<TensorForcedEvalOp<XprType, MakePointer_>, 1, typename eval<TensorForcedEvalOp<XprType, MakePointer_> >::type> | ||||
| { | ||||
|   typedef TensorForcedEvalOp<XprType, MakePointer_> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| // FIXME use proper doxygen documentation (e.g. \tparam MakePointer_) | ||||
|  | ||||
| /** \class TensorForcedEvalOp | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor reshaping class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| /// `template <class> class MakePointer_` is added to convert the host pointer to the device pointer. | ||||
| /// It is added due to the fact that for our device compiler `T*` is not allowed. | ||||
| /// If we wanted to use the same Evaluator functions we have to convert that type to our pointer `T`. | ||||
| /// This is done through our `MakePointer_` class. By default the Type in the `MakePointer_<T>` is `T*` . | ||||
| /// Therefore, by adding the default value, we managed to convert the type and it does not break any | ||||
| /// existing code as its default value is `T*`. | ||||
| template<typename XprType, template <class> class MakePointer_> | ||||
| class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType, MakePointer_>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr) | ||||
|       : m_xpr(expr) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename ArgType, typename Device, template <class> class MakePointer_> | ||||
| struct TensorEvaluator<const TensorForcedEvalOp<ArgType, MakePointer_>, Device> | ||||
| { | ||||
|   typedef TensorForcedEvalOp<ArgType, MakePointer_> XprType; | ||||
|   typedef typename ArgType::Scalar Scalar; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = true, | ||||
|     PacketAccess = (PacketSize > 1), | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     RawAccess = true | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) | ||||
| 	/// op_ is used for sycl | ||||
|       : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) | ||||
|   { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { | ||||
|     const Index numValues =  internal::array_prod(m_impl.dimensions()); | ||||
|     m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); | ||||
|     // Should initialize the memory in case we're dealing with non POD types. | ||||
|     if (NumTraits<CoeffReturnType>::RequireInitialization) { | ||||
|       for (Index i = 0; i < numValues; ++i) { | ||||
|         new(m_buffer+i) CoeffReturnType(); | ||||
|       } | ||||
|     } | ||||
|     typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo; | ||||
|     EvalTo evalToTmp(m_buffer, m_op); | ||||
|     const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value; | ||||
|     internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_device.deallocate(m_buffer); | ||||
|     m_buffer = NULL; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_buffer[index]; | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC typename MakePointer<Scalar>::Type data() const { return m_buffer; } | ||||
|  | ||||
|   /// required by sycl in order to extract the sycl accessor | ||||
|   const TensorEvaluator<ArgType, Device>& impl() { return m_impl; } | ||||
|   /// used by sycl in order to build the sycl buffer | ||||
|   const Device& device() const{return m_device;} | ||||
|  private: | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   const ArgType m_op; | ||||
|   const Device& m_device; | ||||
|   typename MakePointer<CoeffReturnType>::Type m_buffer; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H | ||||
							
								
								
									
										109
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,109 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| // MakePointer class is used as a container of the adress space of the pointer | ||||
| // on the host and on the device. From the host side it generates the T* pointer | ||||
| // and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to | ||||
| // T* m_data on the host. It is always called on the device. | ||||
| // Specialisation of MakePointer class for creating the sycl buffer with | ||||
| // map_allocator. | ||||
| template<typename T> struct MakePointer { | ||||
|   typedef T* Type; | ||||
| }; | ||||
|  | ||||
| template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap; | ||||
| template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor; | ||||
| template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize; | ||||
| template<typename PlainObjectType> class TensorRef; | ||||
| template<typename Derived, int AccessLevel> class TensorBase; | ||||
|  | ||||
| template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp; | ||||
| template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp; | ||||
| template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp; | ||||
| template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> class TensorCwiseTernaryOp; | ||||
| template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp; | ||||
| template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_ = MakePointer > class TensorReductionOp; | ||||
| template<typename XprType> class TensorIndexTupleOp; | ||||
| template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp; | ||||
| template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp; | ||||
| template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp; | ||||
| template<typename TargetType, typename XprType> class TensorConversionOp; | ||||
| template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp; | ||||
| template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp; | ||||
| template<typename PatchDim, typename XprType> class TensorPatchOp; | ||||
| template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp; | ||||
| template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorVolumePatchOp; | ||||
| template<typename Broadcast, typename XprType> class TensorBroadcastingOp; | ||||
| template<DenseIndex DimId, typename XprType> class TensorChippingOp; | ||||
| template<typename NewDimensions, typename XprType> class TensorReshapingOp; | ||||
| template<typename XprType> class TensorLayoutSwapOp; | ||||
| template<typename StartIndices, typename Sizes, typename XprType> class TensorSlicingOp; | ||||
| template<typename ReverseDimensions, typename XprType> class TensorReverseOp; | ||||
| template<typename PaddingDimensions, typename XprType> class TensorPaddingOp; | ||||
| template<typename Shuffle, typename XprType> class TensorShufflingOp; | ||||
| template<typename Strides, typename XprType> class TensorStridingOp; | ||||
| template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> class TensorStridingSlicingOp; | ||||
| template<typename Strides, typename XprType> class TensorInflationOp; | ||||
| template<typename Generator, typename XprType> class TensorGeneratorOp; | ||||
| template<typename LeftXprType, typename RightXprType> class TensorAssignOp; | ||||
| template<typename Op, typename XprType> class TensorScanOp; | ||||
|  | ||||
| template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp; | ||||
| template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp; | ||||
|  | ||||
| template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp; | ||||
| template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp; | ||||
|  | ||||
| template<typename ExpressionType, typename DeviceType> class TensorDevice; | ||||
| template<typename Derived, typename Device> struct TensorEvaluator; | ||||
|  | ||||
| struct DefaultDevice; | ||||
| struct ThreadPoolDevice; | ||||
| struct GpuDevice; | ||||
| struct SyclDevice; | ||||
|  | ||||
| enum FFTResultType { | ||||
|   RealPart = 0, | ||||
|   ImagPart = 1, | ||||
|   BothParts = 2 | ||||
| }; | ||||
|  | ||||
| enum FFTDirection { | ||||
|     FFT_FORWARD = 0, | ||||
|     FFT_REVERSE = 1 | ||||
| }; | ||||
|  | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template <typename Device, typename Expression> | ||||
| struct IsVectorizable { | ||||
|   static const bool value = TensorEvaluator<Expression, Device>::PacketAccess; | ||||
| }; | ||||
|  | ||||
| template <typename Expression> | ||||
| struct IsVectorizable<GpuDevice, Expression> { | ||||
|   static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && | ||||
|                             TensorEvaluator<Expression, GpuDevice>::IsAligned; | ||||
| }; | ||||
|  | ||||
| template <typename Expression, typename Device, | ||||
|           bool Vectorizable = IsVectorizable<Device, Expression>::value> | ||||
| class TensorExecutor; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H | ||||
							
								
								
									
										489
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										489
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,489 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
|  | ||||
| /** \internal | ||||
|  * \brief Template functor to compute the modulo between an array and a scalar. | ||||
|  */ | ||||
| template <typename Scalar> | ||||
| struct scalar_mod_op { | ||||
|   EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} | ||||
|   EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; } | ||||
|   const Scalar m_divisor; | ||||
| }; | ||||
| template <typename Scalar> | ||||
| struct functor_traits<scalar_mod_op<Scalar> > | ||||
| { enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; | ||||
|  | ||||
|  | ||||
| /** \internal | ||||
|  * \brief Template functor to compute the modulo between 2 arrays. | ||||
|  */ | ||||
| template <typename Scalar> | ||||
| struct scalar_mod2_op { | ||||
|   EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op); | ||||
|   EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } | ||||
| }; | ||||
| template <typename Scalar> | ||||
| struct functor_traits<scalar_mod2_op<Scalar> > | ||||
| { enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; | ||||
|  | ||||
| template <typename Scalar> | ||||
| struct scalar_fmod_op { | ||||
|   EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op); | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar | ||||
|   operator()(const Scalar& a, const Scalar& b) const { | ||||
|     return numext::fmod(a, b); | ||||
|   } | ||||
| }; | ||||
| template <typename Scalar> | ||||
| struct functor_traits<scalar_fmod_op<Scalar> > { | ||||
|   enum { Cost = 13,  // Reciprocal throughput of FPREM on Haswell. | ||||
|          PacketAccess = false }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| /** \internal | ||||
|   * \brief Template functor to compute the sigmoid of a scalar | ||||
|   * \sa class CwiseUnaryOp, ArrayBase::sigmoid() | ||||
|   */ | ||||
| template <typename T> | ||||
| struct scalar_sigmoid_op { | ||||
|   EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op) | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { | ||||
|     const T one = T(1); | ||||
|     return one / (one + numext::exp(-x)); | ||||
|   } | ||||
|  | ||||
|   template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Packet packetOp(const Packet& x) const { | ||||
|     const Packet one = pset1<Packet>(T(1)); | ||||
|     return pdiv(one, padd(one, pexp(pnegate(x)))); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| struct functor_traits<scalar_sigmoid_op<T> > { | ||||
|   enum { | ||||
|     Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6, | ||||
|     PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv && | ||||
|                    packet_traits<T>::HasNegate && packet_traits<T>::HasExp | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Reducer, typename Device> | ||||
| struct reducer_traits { | ||||
|   enum { | ||||
|     Cost = 1, | ||||
|     PacketAccess = false | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| // Standard reduction functors | ||||
| template <typename T> struct SumReducer | ||||
| { | ||||
|   static const bool PacketAccess = packet_traits<T>::HasAdd; | ||||
|   static const bool IsStateful = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { | ||||
|     internal::scalar_sum_op<T> sum_op; | ||||
|     *accum = sum_op(*accum, t); | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { | ||||
|     (*accum) = padd<Packet>(*accum, p); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { | ||||
|     internal::scalar_cast_op<int, T> conv; | ||||
|     return conv(0); | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { | ||||
|     return pset1<Packet>(initialize()); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { | ||||
|     return accum; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { | ||||
|     return vaccum; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { | ||||
|     internal::scalar_sum_op<T> sum_op; | ||||
|     return sum_op(saccum, predux(vaccum)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T, typename Device> | ||||
| struct reducer_traits<SumReducer<T>, Device> { | ||||
|   enum { | ||||
|     Cost = NumTraits<T>::AddCost, | ||||
|     PacketAccess = PacketType<T, Device>::HasAdd | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> struct MeanReducer | ||||
| { | ||||
|   static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger; | ||||
|   static const bool IsStateful = true; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   MeanReducer() : scalarCount_(0), packetCount_(0) { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { | ||||
|     internal::scalar_sum_op<T> sum_op; | ||||
|     *accum = sum_op(*accum, t); | ||||
|     scalarCount_++; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { | ||||
|     (*accum) = padd<Packet>(*accum, p); | ||||
|     packetCount_++; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { | ||||
|     internal::scalar_cast_op<int, T> conv; | ||||
|     return conv(0); | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { | ||||
|     return pset1<Packet>(initialize()); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { | ||||
|     return accum / scalarCount_; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { | ||||
|     return pdiv(vaccum, pset1<Packet>(packetCount_)); | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { | ||||
|     internal::scalar_sum_op<T> sum_op; | ||||
|     return sum_op(saccum, predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size); | ||||
|   } | ||||
|  | ||||
|   protected: | ||||
|     DenseIndex scalarCount_; | ||||
|     DenseIndex packetCount_; | ||||
| }; | ||||
|  | ||||
| template <typename T, typename Device> | ||||
| struct reducer_traits<MeanReducer<T>, Device> { | ||||
|   enum { | ||||
|     Cost = NumTraits<T>::AddCost, | ||||
|     PacketAccess = PacketType<T, Device>::HasAdd | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T, bool IsMax = true, bool IsInteger = true> | ||||
| struct MinMaxBottomValue { | ||||
|   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { | ||||
|     return Eigen::NumTraits<T>::lowest(); | ||||
|   } | ||||
| }; | ||||
| template <typename T> | ||||
| struct MinMaxBottomValue<T, true, false> { | ||||
|   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { | ||||
|     return -Eigen::NumTraits<T>::infinity(); | ||||
|   } | ||||
| }; | ||||
| template <typename T> | ||||
| struct MinMaxBottomValue<T, false, true> { | ||||
|   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { | ||||
|     return Eigen::NumTraits<T>::highest(); | ||||
|   } | ||||
| }; | ||||
| template <typename T> | ||||
| struct MinMaxBottomValue<T, false, false> { | ||||
|   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { | ||||
|     return Eigen::NumTraits<T>::infinity(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> struct MaxReducer | ||||
| { | ||||
|   static const bool PacketAccess = packet_traits<T>::HasMax; | ||||
|   static const bool IsStateful = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { | ||||
|     if (t > *accum) { *accum = t; } | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { | ||||
|     (*accum) = pmax<Packet>(*accum, p); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { | ||||
|     return MinMaxBottomValue<T, true, Eigen::NumTraits<T>::IsInteger>::bottom_value(); | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { | ||||
|     return pset1<Packet>(initialize()); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { | ||||
|     return accum; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { | ||||
|     return vaccum; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { | ||||
|     return numext::maxi(saccum, predux_max(vaccum)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T, typename Device> | ||||
| struct reducer_traits<MaxReducer<T>, Device> { | ||||
|   enum { | ||||
|     Cost = NumTraits<T>::AddCost, | ||||
|     PacketAccess = PacketType<T, Device>::HasMax | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> struct MinReducer | ||||
| { | ||||
|   static const bool PacketAccess = packet_traits<T>::HasMin; | ||||
|   static const bool IsStateful = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { | ||||
|     if (t < *accum) { *accum = t; } | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { | ||||
|     (*accum) = pmin<Packet>(*accum, p); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { | ||||
|     return MinMaxBottomValue<T, false, Eigen::NumTraits<T>::IsInteger>::bottom_value(); | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { | ||||
|     return pset1<Packet>(initialize()); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { | ||||
|     return accum; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { | ||||
|     return vaccum; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { | ||||
|     return numext::mini(saccum, predux_min(vaccum)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T, typename Device> | ||||
| struct reducer_traits<MinReducer<T>, Device> { | ||||
|   enum { | ||||
|     Cost = NumTraits<T>::AddCost, | ||||
|     PacketAccess = PacketType<T, Device>::HasMin | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> struct ProdReducer | ||||
| { | ||||
|   static const bool PacketAccess = packet_traits<T>::HasMul; | ||||
|   static const bool IsStateful = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { | ||||
|     internal::scalar_product_op<T> prod_op; | ||||
|     (*accum) = prod_op(*accum, t); | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { | ||||
|     (*accum) = pmul<Packet>(*accum, p); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { | ||||
|     internal::scalar_cast_op<int, T> conv; | ||||
|     return conv(1); | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { | ||||
|     return pset1<Packet>(initialize()); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { | ||||
|     return accum; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { | ||||
|     return vaccum; | ||||
|   } | ||||
|   template <typename Packet> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { | ||||
|     internal::scalar_product_op<T> prod_op; | ||||
|     return prod_op(saccum, predux_mul(vaccum)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T, typename Device> | ||||
| struct reducer_traits<ProdReducer<T>, Device> { | ||||
|   enum { | ||||
|     Cost = NumTraits<T>::MulCost, | ||||
|     PacketAccess = PacketType<T, Device>::HasMul | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| struct AndReducer | ||||
| { | ||||
|   static const bool PacketAccess = false; | ||||
|   static const bool IsStateful = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { | ||||
|     *accum = *accum && t; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { | ||||
|     return accum; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Device> | ||||
| struct reducer_traits<AndReducer, Device> { | ||||
|   enum { | ||||
|     Cost = 1, | ||||
|     PacketAccess = false | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| struct OrReducer { | ||||
|   static const bool PacketAccess = false; | ||||
|   static const bool IsStateful = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { | ||||
|     *accum = *accum || t; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { | ||||
|     return false; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { | ||||
|     return accum; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Device> | ||||
| struct reducer_traits<OrReducer, Device> { | ||||
|   enum { | ||||
|     Cost = 1, | ||||
|     PacketAccess = false | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Argmin/Argmax reducers | ||||
| template <typename T> struct ArgMaxTupleReducer | ||||
| { | ||||
|   static const bool PacketAccess = false; | ||||
|   static const bool IsStateful = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { | ||||
|     if (t.second > accum->second) { *accum = t; } | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { | ||||
|     return T(0, NumTraits<typename T::second_type>::lowest()); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { | ||||
|     return accum; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T, typename Device> | ||||
| struct reducer_traits<ArgMaxTupleReducer<T>, Device> { | ||||
|   enum { | ||||
|     Cost = NumTraits<T>::AddCost, | ||||
|     PacketAccess = false | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> struct ArgMinTupleReducer | ||||
| { | ||||
|   static const bool PacketAccess = false; | ||||
|   static const bool IsStateful = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const { | ||||
|     if (t.second < accum->second) { *accum = t; } | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { | ||||
|     return T(0, NumTraits<typename T::second_type>::highest()); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { | ||||
|     return accum; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T, typename Device> | ||||
| struct reducer_traits<ArgMinTupleReducer<T>, Device> { | ||||
|   enum { | ||||
|     Cost = NumTraits<T>::AddCost, | ||||
|     PacketAccess = false | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T, typename Index, size_t NumDims> | ||||
| class GaussianGenerator { | ||||
|  public: | ||||
|   static const bool PacketAccess = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means, | ||||
|                                       const array<T, NumDims>& std_devs) | ||||
|       : m_means(means) | ||||
|   { | ||||
|     for (size_t i = 0; i < NumDims; ++i) { | ||||
|       m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const { | ||||
|     T tmp = T(0); | ||||
|     for (size_t i = 0; i < NumDims; ++i) { | ||||
|       T offset = coordinates[i] - m_means[i]; | ||||
|       tmp += offset * offset / m_two_sigmas[i]; | ||||
|     } | ||||
|     return numext::exp(-tmp); | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   array<T, NumDims> m_means; | ||||
|   array<T, NumDims> m_two_sigmas; | ||||
| }; | ||||
|  | ||||
| template <typename T, typename Index, size_t NumDims> | ||||
| struct functor_traits<GaussianGenerator<T, Index, NumDims> > { | ||||
|   enum { | ||||
|     Cost = NumDims * (2 * NumTraits<T>::AddCost + NumTraits<T>::MulCost + | ||||
|                       functor_traits<scalar_quotient_op<T, T> >::Cost) + | ||||
|            functor_traits<scalar_exp_op<T> >::Cost, | ||||
|     PacketAccess = GaussianGenerator<T, Index, NumDims>::PacketAccess | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| } // end namespace internal | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H | ||||
							
								
								
									
										185
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										185
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,185 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorGeneratorOp | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor generator class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename Generator, typename XprType> | ||||
| struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename Generator, typename XprType> | ||||
| struct eval<TensorGeneratorOp<Generator, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorGeneratorOp<Generator, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Generator, typename XprType> | ||||
| struct nested<TensorGeneratorOp<Generator, XprType>, 1, typename eval<TensorGeneratorOp<Generator, XprType> >::type> | ||||
| { | ||||
|   typedef TensorGeneratorOp<Generator, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename Generator, typename XprType> | ||||
| class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorGeneratorOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorGeneratorOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorGeneratorOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorGeneratorOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator) | ||||
|       : m_xpr(expr), m_generator(generator) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const Generator& generator() const { return m_generator; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const Generator m_generator; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename Generator, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorGeneratorOp<Generator, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; | ||||
|   static const int NumDims = internal::array_size<Dimensions>::value; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_generator(op.generator()) | ||||
|   { | ||||
|     TensorEvaluator<ArgType, Device> impl(op.expression(), device); | ||||
|     m_dimensions = impl.dimensions(); | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_strides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; | ||||
|       } | ||||
|     } else { | ||||
|       m_strides[NumDims - 1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     array<Index, NumDims> coords; | ||||
|     extract_coordinates(index, coords); | ||||
|     return m_generator(coords); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     const int packetSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+packetSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; | ||||
|     for (int i = 0; i < packetSize; ++i) { | ||||
|       values[i] = coeff(index+i); | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool) const { | ||||
|     // TODO(rmlarsen): This is just a placeholder. Define interface to make | ||||
|     // generators return their cost. | ||||
|     return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() + | ||||
|                                   TensorOpCost::MulCost<Scalar>()); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void extract_coordinates(Index index, array<Index, NumDims>& coords) const { | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx = index / m_strides[i]; | ||||
|         index -= idx * m_strides[i]; | ||||
|         coords[i] = idx; | ||||
|       } | ||||
|       coords[0] = index; | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx = index / m_strides[i]; | ||||
|         index -= idx * m_strides[i]; | ||||
|         coords[i] = idx; | ||||
|       } | ||||
|       coords[NumDims-1] = index; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims> m_strides; | ||||
|   Generator m_generator; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H | ||||
							
								
								
									
										33
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors. | ||||
|  * | ||||
|  * This function computes the regularized incomplete beta function (integral). | ||||
|  * | ||||
|  */ | ||||
| template <typename ADerived, typename BDerived, typename XDerived> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const | ||||
|     TensorCwiseTernaryOp<internal::scalar_betainc_op<typename XDerived::Scalar>, | ||||
|                          const ADerived, const BDerived, const XDerived> | ||||
|     betainc(const ADerived& a, const BDerived& b, const XDerived& x) { | ||||
|   return TensorCwiseTernaryOp< | ||||
|       internal::scalar_betainc_op<typename XDerived::Scalar>, const ADerived, | ||||
|       const BDerived, const XDerived>( | ||||
|       a, b, x, internal::scalar_betainc_op<typename XDerived::Scalar>()); | ||||
| } | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H | ||||
							
								
								
									
										79
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,79 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_IO_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| // Print the tensor as a 2d matrix | ||||
| template <typename Tensor, int Rank> | ||||
| struct TensorPrinter { | ||||
|   static void run (std::ostream& os, const Tensor& tensor) { | ||||
|     typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar; | ||||
|     typedef typename Tensor::Index Index; | ||||
|     const Index total_size = internal::array_prod(tensor.dimensions()); | ||||
|     if (total_size > 0) { | ||||
|       const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); | ||||
|       static const int layout = Tensor::Layout; | ||||
|       Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim); | ||||
|       os << matrix; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Print the tensor as a vector | ||||
| template <typename Tensor> | ||||
| struct TensorPrinter<Tensor, 1> { | ||||
|   static void run (std::ostream& os, const Tensor& tensor) { | ||||
|     typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar; | ||||
|     typedef typename Tensor::Index Index; | ||||
|     const Index total_size = internal::array_prod(tensor.dimensions()); | ||||
|     if (total_size > 0) { | ||||
|       Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size); | ||||
|       os << array; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Print the tensor as a scalar | ||||
| template <typename Tensor> | ||||
| struct TensorPrinter<Tensor, 0> { | ||||
|   static void run (std::ostream& os, const Tensor& tensor) { | ||||
|     os << tensor.coeff(0); | ||||
|   } | ||||
| }; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) { | ||||
|   typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator; | ||||
|   typedef typename Evaluator::Dimensions Dimensions; | ||||
|  | ||||
|   // Evaluate the expression if needed | ||||
|   TensorForcedEvalOp<const T> eval = expr.eval(); | ||||
|   Evaluator tensor(eval, DefaultDevice()); | ||||
|   tensor.evalSubExprsIfNeeded(NULL); | ||||
|  | ||||
|   // Print the result | ||||
|   static const int rank = internal::array_size<Dimensions>::value; | ||||
|   internal::TensorPrinter<Evaluator, rank>::run(os, tensor); | ||||
|  | ||||
|   // Cleanup. | ||||
|   tensor.cleanup(); | ||||
|   return os; | ||||
| } | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H | ||||
							
								
								
									
										509
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										509
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,509 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorImagePatch | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Patch extraction specialized for image processing. | ||||
|   * This assumes that the input has a least 3 dimensions ordered as follow: | ||||
|   *  1st dimension: channels (of size d) | ||||
|   *  2nd dimension: rows (of size r) | ||||
|   *  3rd dimension: columns (of size c) | ||||
|   *  There can be additional dimensions such as time (for video) or batch (for | ||||
|   * bulk processing after the first 3. | ||||
|   * Calling the image patch code with patch_rows and patch_cols is equivalent | ||||
|   * to calling the regular patch extraction code with parameters d, patch_rows, | ||||
|   * patch_cols, and 1 for all the additional dimensions. | ||||
|   */ | ||||
| namespace internal { | ||||
| template<DenseIndex Rows, DenseIndex Cols, typename XprType> | ||||
| struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions + 1; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<DenseIndex Rows, DenseIndex Cols, typename XprType> | ||||
| struct eval<TensorImagePatchOp<Rows, Cols, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorImagePatchOp<Rows, Cols, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<DenseIndex Rows, DenseIndex Cols, typename XprType> | ||||
| struct nested<TensorImagePatchOp<Rows, Cols, XprType>, 1, typename eval<TensorImagePatchOp<Rows, Cols, XprType> >::type> | ||||
| { | ||||
|   typedef TensorImagePatchOp<Rows, Cols, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| template<DenseIndex Rows, DenseIndex Cols, typename XprType> | ||||
| class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorImagePatchOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorImagePatchOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorImagePatchOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorImagePatchOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, | ||||
|                                                            DenseIndex row_strides, DenseIndex col_strides, | ||||
|                                                            DenseIndex in_row_strides, DenseIndex in_col_strides, | ||||
|                                                            DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, | ||||
|                                                            PaddingType padding_type, Scalar padding_value) | ||||
|       : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), | ||||
|         m_row_strides(row_strides), m_col_strides(col_strides), | ||||
|         m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), | ||||
|         m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), | ||||
|         m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), | ||||
|         m_padding_type(padding_type), m_padding_value(padding_value) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, | ||||
|                                                            DenseIndex row_strides, DenseIndex col_strides, | ||||
|                                                            DenseIndex in_row_strides, DenseIndex in_col_strides, | ||||
|                                                            DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, | ||||
|                                                            DenseIndex padding_top, DenseIndex padding_bottom, | ||||
|                                                            DenseIndex padding_left, DenseIndex padding_right, | ||||
|                                                            Scalar padding_value) | ||||
|       : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), | ||||
|         m_row_strides(row_strides), m_col_strides(col_strides), | ||||
|         m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), | ||||
|         m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), | ||||
|         m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom), | ||||
|         m_padding_left(padding_left), m_padding_right(padding_right), | ||||
|         m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex patch_rows() const { return m_patch_rows; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex patch_cols() const { return m_patch_cols; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex row_strides() const { return m_row_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex col_strides() const { return m_col_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex in_row_strides() const { return m_in_row_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex in_col_strides() const { return m_in_col_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     bool padding_explicit() const { return m_padding_explicit; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_top() const { return m_padding_top; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_bottom() const { return m_padding_bottom; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_left() const { return m_padding_left; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_right() const { return m_padding_right; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     PaddingType padding_type() const { return m_padding_type; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     Scalar padding_value() const { return m_padding_value; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const DenseIndex m_patch_rows; | ||||
|     const DenseIndex m_patch_cols; | ||||
|     const DenseIndex m_row_strides; | ||||
|     const DenseIndex m_col_strides; | ||||
|     const DenseIndex m_in_row_strides; | ||||
|     const DenseIndex m_in_col_strides; | ||||
|     const DenseIndex m_row_inflate_strides; | ||||
|     const DenseIndex m_col_inflate_strides; | ||||
|     const bool m_padding_explicit; | ||||
|     const DenseIndex m_padding_top; | ||||
|     const DenseIndex m_padding_bottom; | ||||
|     const DenseIndex m_padding_left; | ||||
|     const DenseIndex m_padding_right; | ||||
|     const PaddingType m_padding_type; | ||||
|     const Scalar m_padding_value; | ||||
| }; | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   static const int NumDims = NumInputDims + 1; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; | ||||
|   typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, | ||||
|                           Device> Self; | ||||
|   typedef TensorEvaluator<ArgType, Device> Impl; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|     m_paddingValue = op.padding_value(); | ||||
|  | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|  | ||||
|     // Caches a few variables. | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_inputDepth = input_dims[0]; | ||||
|       m_inputRows = input_dims[1]; | ||||
|       m_inputCols = input_dims[2]; | ||||
|     } else { | ||||
|       m_inputDepth = input_dims[NumInputDims-1]; | ||||
|       m_inputRows = input_dims[NumInputDims-2]; | ||||
|       m_inputCols = input_dims[NumInputDims-3]; | ||||
|     } | ||||
|  | ||||
|     m_row_strides = op.row_strides(); | ||||
|     m_col_strides = op.col_strides(); | ||||
|  | ||||
|     // Input strides and effective input/patch size | ||||
|     m_in_row_strides = op.in_row_strides(); | ||||
|     m_in_col_strides = op.in_col_strides(); | ||||
|     m_row_inflate_strides = op.row_inflate_strides(); | ||||
|     m_col_inflate_strides = op.col_inflate_strides(); | ||||
|     // The "effective" input rows and input cols are the input rows and cols | ||||
|     // after inflating them with zeros. | ||||
|     // For examples, a 2x3 matrix with row_inflate_strides and | ||||
|     // col_inflate_strides of 2 comes from: | ||||
|     //   A B C | ||||
|     //   D E F | ||||
|     // | ||||
|     // to a matrix is 3 x 5: | ||||
|     // | ||||
|     //   A . B . C | ||||
|     //   . . . . . | ||||
|     //   D . E . F | ||||
|  | ||||
|     m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; | ||||
|     m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; | ||||
|     m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); | ||||
|     m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); | ||||
|  | ||||
|     if (op.padding_explicit()) { | ||||
|       m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); | ||||
|       m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); | ||||
|       m_rowPaddingTop = op.padding_top(); | ||||
|       m_colPaddingLeft = op.padding_left(); | ||||
|     } else { | ||||
|       // Computing padding from the type | ||||
|       switch (op.padding_type()) { | ||||
|         case PADDING_VALID: | ||||
|           m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); | ||||
|           m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); | ||||
|           // Calculate the padding | ||||
|           m_rowPaddingTop = numext::maxi<Index>(0, ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2); | ||||
|           m_colPaddingLeft = numext::maxi<Index>(0, ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2); | ||||
|           break; | ||||
|         case PADDING_SAME: | ||||
|           m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides)); | ||||
|           m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides)); | ||||
|           // Calculate the padding | ||||
|           m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; | ||||
|           m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; | ||||
|           break; | ||||
|         default: | ||||
|           eigen_assert(false && "unexpected padding"); | ||||
|       } | ||||
|     } | ||||
|     eigen_assert(m_outputRows > 0); | ||||
|     eigen_assert(m_outputCols > 0); | ||||
|  | ||||
|     // Dimensions for result of extraction. | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       // ColMajor | ||||
|       // 0: depth | ||||
|       // 1: patch_rows | ||||
|       // 2: patch_cols | ||||
|       // 3: number of patches | ||||
|       // 4 and beyond: anything else (such as batch). | ||||
|       m_dimensions[0] = input_dims[0]; | ||||
|       m_dimensions[1] = op.patch_rows(); | ||||
|       m_dimensions[2] = op.patch_cols(); | ||||
|       m_dimensions[3] = m_outputRows * m_outputCols; | ||||
|       for (int i = 4; i < NumDims; ++i) { | ||||
|         m_dimensions[i] = input_dims[i-1]; | ||||
|       } | ||||
|     } else { | ||||
|       // RowMajor | ||||
|       // NumDims-1: depth | ||||
|       // NumDims-2: patch_rows | ||||
|       // NumDims-3: patch_cols | ||||
|       // NumDims-4: number of patches | ||||
|       // NumDims-5 and beyond: anything else (such as batch). | ||||
|       m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; | ||||
|       m_dimensions[NumDims-2] = op.patch_rows(); | ||||
|       m_dimensions[NumDims-3] = op.patch_cols(); | ||||
|       m_dimensions[NumDims-4] = m_outputRows * m_outputCols; | ||||
|       for (int i = NumDims-5; i >= 0; --i) { | ||||
|         m_dimensions[i] = input_dims[i]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Strides for moving the patch in various dimensions. | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_colStride = m_dimensions[1]; | ||||
|       m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; | ||||
|       m_otherStride = m_patchStride * m_dimensions[3]; | ||||
|     } else { | ||||
|       m_colStride = m_dimensions[NumDims-2]; | ||||
|       m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1]; | ||||
|       m_otherStride = m_patchStride * m_dimensions[NumDims-4]; | ||||
|     } | ||||
|  | ||||
|     // Strides for navigating through the input tensor. | ||||
|     m_rowInputStride = m_inputDepth; | ||||
|     m_colInputStride = m_inputDepth * m_inputRows; | ||||
|     m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols; | ||||
|  | ||||
|     // Fast representations of different variables. | ||||
|     m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride); | ||||
|     m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride); | ||||
|     m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride); | ||||
|     m_fastInflateRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides); | ||||
|     m_fastInflateColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides); | ||||
|     m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff); | ||||
|  | ||||
|     // Number of patches in the width dimension. | ||||
|     m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]); | ||||
|     } else { | ||||
|       m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     // Patch index corresponding to the passed in index. | ||||
|     const Index patchIndex = index / m_fastPatchStride; | ||||
|     // Find the offset of the element wrt the location of the first element. | ||||
|     const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; | ||||
|  | ||||
|     // Other ways to index this element. | ||||
|     const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride; | ||||
|     const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; | ||||
|  | ||||
|     // Calculate col index in the input original tensor. | ||||
|     const Index colIndex = patch2DIndex / m_fastOutputRows; | ||||
|     const Index colOffset = patchOffset / m_fastColStride; | ||||
|     const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; | ||||
|     const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInflateColStride) : 0); | ||||
|     if (inputCol < 0 || inputCol >= m_input_cols_eff || | ||||
|         ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { | ||||
|       return Scalar(m_paddingValue); | ||||
|     } | ||||
|  | ||||
|     // Calculate row index in the original input tensor. | ||||
|     const Index rowIndex = patch2DIndex - colIndex * m_outputRows; | ||||
|     const Index rowOffset = patchOffset - colOffset * m_colStride; | ||||
|     const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; | ||||
|     const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInflateRowStride) : 0); | ||||
|     if (inputRow < 0 || inputRow >= m_input_rows_eff || | ||||
|         ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { | ||||
|       return Scalar(m_paddingValue); | ||||
|     } | ||||
|  | ||||
|     const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; | ||||
|     const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; | ||||
|  | ||||
|     const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride; | ||||
|     return m_impl.coeff(inputIndex); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { | ||||
|       return packetWithPossibleZero(index); | ||||
|     } | ||||
|  | ||||
|     const Index indices[2] = {index, index + PacketSize - 1}; | ||||
|     const Index patchIndex = indices[0] / m_fastPatchStride; | ||||
|     if (patchIndex != indices[1] / m_fastPatchStride) { | ||||
|       return packetWithPossibleZero(index); | ||||
|     } | ||||
|     const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride; | ||||
|     eigen_assert(otherIndex == indices[1] / m_fastOtherStride); | ||||
|  | ||||
|     // Find the offset of the element wrt the location of the first element. | ||||
|     const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, | ||||
|                                    (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; | ||||
|  | ||||
|     const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; | ||||
|     eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); | ||||
|  | ||||
|     const Index colIndex = patch2DIndex / m_fastOutputRows; | ||||
|     const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; | ||||
|  | ||||
|     // Calculate col indices in the original input tensor. | ||||
|     const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] - | ||||
|       m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; | ||||
|     if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { | ||||
|       return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); | ||||
|     } | ||||
|  | ||||
|     if (inputCols[0] == inputCols[1]) { | ||||
|       const Index rowIndex = patch2DIndex - colIndex * m_outputRows; | ||||
|       const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; | ||||
|       eigen_assert(rowOffsets[0] <= rowOffsets[1]); | ||||
|       // Calculate col indices in the original input tensor. | ||||
|       const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] - | ||||
|         m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; | ||||
|  | ||||
|       if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { | ||||
|         return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); | ||||
|       } | ||||
|  | ||||
|       if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { | ||||
|         // no padding | ||||
|         const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; | ||||
|         const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; | ||||
|         const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride; | ||||
|         return m_impl.template packet<Unaligned>(inputIndex); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     return packetWithPossibleZero(index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|   const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } | ||||
|  | ||||
|   Index rowPaddingTop() const { return m_rowPaddingTop; } | ||||
|   Index colPaddingLeft() const { return m_colPaddingLeft; } | ||||
|   Index outputRows() const { return m_outputRows; } | ||||
|   Index outputCols() const { return m_outputCols; } | ||||
|   Index userRowStride() const { return m_row_strides; } | ||||
|   Index userColStride() const { return m_col_strides; } | ||||
|   Index userInRowStride() const { return m_in_row_strides; } | ||||
|   Index userInColStride() const { return m_in_col_strides; } | ||||
|   Index rowInflateStride() const { return m_row_inflate_strides; } | ||||
|   Index colInflateStride() const { return m_col_inflate_strides; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     // We conservatively estimate the cost for the code path where the computed | ||||
|     // index is inside the original image and | ||||
|     // TensorEvaluator<ArgType, Device>::CoordAccess is false. | ||||
|     const double compute_cost = 3 * TensorOpCost::DivCost<Index>() + | ||||
|                                 6 * TensorOpCost::MulCost<Index>() + | ||||
|                                 8 * TensorOpCost::MulCost<Index>(); | ||||
|     return m_impl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const | ||||
|   { | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|     for (int i = 0; i < PacketSize; ++i) { | ||||
|       values[i] = coeff(index+i); | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|  | ||||
|   Index m_otherStride; | ||||
|   Index m_patchStride; | ||||
|   Index m_colStride; | ||||
|   Index m_row_strides; | ||||
|   Index m_col_strides; | ||||
|  | ||||
|   Index m_in_row_strides; | ||||
|   Index m_in_col_strides; | ||||
|   Index m_row_inflate_strides; | ||||
|   Index m_col_inflate_strides; | ||||
|  | ||||
|   Index m_input_rows_eff; | ||||
|   Index m_input_cols_eff; | ||||
|   Index m_patch_rows_eff; | ||||
|   Index m_patch_cols_eff; | ||||
|  | ||||
|   internal::TensorIntDivisor<Index> m_fastOtherStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastPatchStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastColStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastInflateRowStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastInflateColStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastInputColsEff; | ||||
|  | ||||
|   Index m_rowInputStride; | ||||
|   Index m_colInputStride; | ||||
|   Index m_patchInputStride; | ||||
|  | ||||
|   Index m_inputDepth; | ||||
|   Index m_inputRows; | ||||
|   Index m_inputCols; | ||||
|  | ||||
|   Index m_outputRows; | ||||
|   Index m_outputCols; | ||||
|  | ||||
|   Index m_rowPaddingTop; | ||||
|   Index m_colPaddingLeft; | ||||
|  | ||||
|   internal::TensorIntDivisor<Index> m_fastOutputRows; | ||||
|   internal::TensorIntDivisor<Index> m_fastOutputDepth; | ||||
|  | ||||
|   Scalar m_paddingValue; | ||||
|  | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H | ||||
							
								
								
									
										725
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										725
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,725 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H | ||||
|  | ||||
|  | ||||
| #if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|  | ||||
| #define EIGEN_HAS_INDEX_LIST | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class TensorIndexList | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Set of classes used to encode a set of Tensor dimensions/indices. | ||||
|   * | ||||
|   * The indices in the list can be known at compile time or at runtime. A mix | ||||
|   * of static and dynamic indices can also be provided if needed. The tensor | ||||
|   * code will attempt to take advantage of the indices that are known at | ||||
|   * compile time to optimize the code it generates. | ||||
|   * | ||||
|   * This functionality requires a c++11 compliant compiler. If your compiler | ||||
|   * is older you need to use arrays of indices instead. | ||||
|   * | ||||
|   * Several examples are provided in the cxx11_tensor_index_list.cpp file. | ||||
|   * | ||||
|   * \sa Tensor | ||||
|   */ | ||||
|  | ||||
| template <DenseIndex n> | ||||
| struct type2index { | ||||
|   static const DenseIndex value = n; | ||||
|   EIGEN_DEVICE_FUNC constexpr operator DenseIndex() const { return n; } | ||||
|   EIGEN_DEVICE_FUNC void set(DenseIndex val) { | ||||
|     eigen_assert(val == n); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| // This can be used with IndexPairList to get compile-time constant pairs, | ||||
| // such as IndexPairList<type2indexpair<1,2>, type2indexpair<3,4>>(). | ||||
| template <DenseIndex f, DenseIndex s> | ||||
| struct type2indexpair { | ||||
|   static const DenseIndex first = f; | ||||
|   static const DenseIndex second = s; | ||||
|  | ||||
|   constexpr EIGEN_DEVICE_FUNC operator IndexPair<DenseIndex>() const { | ||||
|     return IndexPair<DenseIndex>(f, s); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC void set(const IndexPair<DenseIndex>& val) { | ||||
|     eigen_assert(val.first == f); | ||||
|     eigen_assert(val.second == s); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<DenseIndex n> struct NumTraits<type2index<n> > | ||||
| { | ||||
|   typedef DenseIndex Real; | ||||
|   enum { | ||||
|     IsComplex = 0, | ||||
|     RequireInitialization = false, | ||||
|     ReadCost = 1, | ||||
|     AddCost = 1, | ||||
|     MulCost = 1 | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; } | ||||
|   EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; } | ||||
|   EIGEN_DEVICE_FUNC static inline Real highest() { return n; } | ||||
|   EIGEN_DEVICE_FUNC static inline Real lowest() { return n; } | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
| template <typename T> | ||||
| EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) { | ||||
|   val = new_val; | ||||
| } | ||||
| template <DenseIndex n> | ||||
| EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) { | ||||
|   val.set(new_val); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| EIGEN_DEVICE_FUNC void update_value(T& val, IndexPair<DenseIndex> new_val) { | ||||
|   val = new_val; | ||||
| } | ||||
| template <DenseIndex f, DenseIndex s> | ||||
| EIGEN_DEVICE_FUNC void update_value(type2indexpair<f, s>& val, IndexPair<DenseIndex> new_val) { | ||||
|   val.set(new_val); | ||||
| } | ||||
|  | ||||
|  | ||||
| template <typename T> | ||||
| struct is_compile_time_constant { | ||||
|   static constexpr bool value = false; | ||||
| }; | ||||
|  | ||||
| template <DenseIndex idx> | ||||
| struct is_compile_time_constant<type2index<idx> > { | ||||
|   static constexpr bool value = true; | ||||
| }; | ||||
| template <DenseIndex idx> | ||||
| struct is_compile_time_constant<const type2index<idx> > { | ||||
|   static constexpr bool value = true; | ||||
| }; | ||||
| template <DenseIndex idx> | ||||
| struct is_compile_time_constant<type2index<idx>& > { | ||||
|   static constexpr bool value = true; | ||||
| }; | ||||
| template <DenseIndex idx> | ||||
| struct is_compile_time_constant<const type2index<idx>& > { | ||||
|   static constexpr bool value = true; | ||||
| }; | ||||
|  | ||||
| template <DenseIndex f, DenseIndex s> | ||||
| struct is_compile_time_constant<type2indexpair<f, s> > { | ||||
|   static constexpr bool value = true; | ||||
| }; | ||||
| template <DenseIndex f, DenseIndex s> | ||||
| struct is_compile_time_constant<const type2indexpair<f, s> > { | ||||
|   static constexpr bool value = true; | ||||
| }; | ||||
| template <DenseIndex f, DenseIndex s> | ||||
| struct is_compile_time_constant<type2indexpair<f, s>& > { | ||||
|   static constexpr bool value = true; | ||||
| }; | ||||
| template <DenseIndex f, DenseIndex s> | ||||
| struct is_compile_time_constant<const type2indexpair<f, s>& > { | ||||
|   static constexpr bool value = true; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename... T> | ||||
| struct IndexTuple; | ||||
|  | ||||
| template<typename T, typename... O> | ||||
| struct IndexTuple<T, O...> { | ||||
|   EIGEN_DEVICE_FUNC constexpr IndexTuple() : head(), others() { } | ||||
|   EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) { } | ||||
|  | ||||
|   constexpr static int count = 1 + sizeof...(O); | ||||
|   T head; | ||||
|   IndexTuple<O...> others; | ||||
|   typedef T Head; | ||||
|   typedef IndexTuple<O...> Other; | ||||
| }; | ||||
|  | ||||
| template<typename T> | ||||
|   struct IndexTuple<T> { | ||||
|   EIGEN_DEVICE_FUNC constexpr IndexTuple() : head() { } | ||||
|   EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v) : head(v) { } | ||||
|  | ||||
|   constexpr static int count = 1; | ||||
|   T head; | ||||
|   typedef T Head; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<int N, typename... T> | ||||
| struct IndexTupleExtractor; | ||||
|  | ||||
| template<int N, typename T, typename... O> | ||||
| struct IndexTupleExtractor<N, T, O...> { | ||||
|  | ||||
|   typedef typename IndexTupleExtractor<N-1, O...>::ValType ValType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple<T, O...>& val) { | ||||
|     return IndexTupleExtractor<N-1, O...>::get_val(val.others); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple<T, O...>& val) { | ||||
|     return IndexTupleExtractor<N-1, O...>::get_val(val.others); | ||||
|   } | ||||
|   template <typename V> | ||||
|   EIGEN_DEVICE_FUNC static void set_val(IndexTuple<T, O...>& val, V& new_val) { | ||||
|     IndexTupleExtractor<N-1, O...>::set_val(val.others, new_val); | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| template<typename T, typename... O> | ||||
|   struct IndexTupleExtractor<0, T, O...> { | ||||
|  | ||||
|   typedef T ValType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple<T, O...>& val) { | ||||
|     return val.head; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple<T, O...>& val) { | ||||
|     return val.head; | ||||
|   } | ||||
|   template <typename V> | ||||
|   EIGEN_DEVICE_FUNC static void set_val(IndexTuple<T, O...>& val, V& new_val) { | ||||
|     val.head = new_val; | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| template <int N, typename T, typename... O> | ||||
| EIGEN_DEVICE_FUNC constexpr typename IndexTupleExtractor<N, T, O...>::ValType& array_get(IndexTuple<T, O...>& tuple) { | ||||
|   return IndexTupleExtractor<N, T, O...>::get_val(tuple); | ||||
| } | ||||
| template <int N, typename T, typename... O> | ||||
| EIGEN_DEVICE_FUNC constexpr const typename IndexTupleExtractor<N, T, O...>::ValType& array_get(const IndexTuple<T, O...>& tuple) { | ||||
|   return IndexTupleExtractor<N, T, O...>::get_val(tuple); | ||||
| } | ||||
| template <typename T, typename... O> | ||||
|   struct array_size<IndexTuple<T, O...> > { | ||||
|   static const size_t value = IndexTuple<T, O...>::count; | ||||
| }; | ||||
| template <typename T, typename... O> | ||||
|   struct array_size<const IndexTuple<T, O...> > { | ||||
|   static const size_t value = IndexTuple<T, O...>::count; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| template <DenseIndex Idx, typename ValueT> | ||||
| struct tuple_coeff { | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex i, const IndexTuple<T...>& t) { | ||||
|     //    return array_get<Idx>(t) * (i == Idx) + tuple_coeff<Idx-1>::get(i, t) * (i != Idx); | ||||
|     return (i == Idx ? array_get<Idx>(t) : tuple_coeff<Idx-1, ValueT>::get(i, t)); | ||||
|   } | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple<T...>& t, const ValueT& value) { | ||||
|     if (i == Idx) { | ||||
|       update_value(array_get<Idx>(t), value); | ||||
|     } else { | ||||
|       tuple_coeff<Idx-1, ValueT>::set(i, t, value); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple<T...>& t) { | ||||
|     return ((i == Idx) & is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) || | ||||
|         tuple_coeff<Idx-1, ValueT>::value_known_statically(i, t); | ||||
|   } | ||||
|  | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple<T...>& t) { | ||||
|     return is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value && | ||||
|         tuple_coeff<Idx-1, ValueT>::values_up_to_known_statically(t); | ||||
|   } | ||||
|  | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple<T...>& t) { | ||||
|     return is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value && | ||||
|            is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value && | ||||
|            array_get<Idx>(t) > array_get<Idx-1>(t) && | ||||
|            tuple_coeff<Idx-1, ValueT>::values_up_to_statically_known_to_increase(t); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename ValueT> | ||||
| struct tuple_coeff<0, ValueT> { | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex /*i*/, const IndexTuple<T...>& t) { | ||||
|     //  eigen_assert (i == 0);  // gcc fails to compile assertions in constexpr | ||||
|     return array_get<0>(t)/* * (i == 0)*/; | ||||
|   } | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple<T...>& t, const ValueT value) { | ||||
|     eigen_assert (i == 0); | ||||
|     update_value(array_get<0>(t), value); | ||||
|   } | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple<T...>&) { | ||||
|     return is_compile_time_constant<typename IndexTupleExtractor<0, T...>::ValType>::value & (i == 0); | ||||
|   } | ||||
|  | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple<T...>&) { | ||||
|     return is_compile_time_constant<typename IndexTupleExtractor<0, T...>::ValType>::value; | ||||
|   } | ||||
|  | ||||
|   template <typename... T> | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple<T...>&) { | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
| }  // namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename FirstType, typename... OtherTypes> | ||||
| struct IndexList : internal::IndexTuple<FirstType, OtherTypes...> { | ||||
|   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::get(i, *this); | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex get(const DenseIndex i) const { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::get(i, *this); | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::set(i, *this, value); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { } | ||||
|   EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple<FirstType, OtherTypes...>(first, other...) { } | ||||
|   EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple<FirstType, OtherTypes...>() { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::value_known_statically(i, *this); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC constexpr bool all_values_known_statically() const { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::values_up_to_known_statically(*this); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC constexpr bool values_statically_known_to_increase() const { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::values_up_to_statically_known_to_increase(*this); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename FirstType, typename... OtherTypes> | ||||
| constexpr IndexList<FirstType, OtherTypes...> make_index_list(FirstType val1, OtherTypes... other_vals) { | ||||
|   return IndexList<FirstType, OtherTypes...>(val1, other_vals...); | ||||
| } | ||||
|  | ||||
|  | ||||
| template<typename FirstType, typename... OtherTypes> | ||||
| struct IndexPairList : internal::IndexTuple<FirstType, OtherTypes...> { | ||||
|   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr IndexPair<DenseIndex> operator[] (const DenseIndex i) const { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, IndexPair<DenseIndex>>::get(i, *this); | ||||
|   } | ||||
|   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const IndexPair<DenseIndex> value) { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...>>::value-1, IndexPair<DenseIndex> >::set(i, *this, value); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC  constexpr IndexPairList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { } | ||||
|   EIGEN_DEVICE_FUNC  constexpr IndexPairList() : internal::IndexTuple<FirstType, OtherTypes...>() { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { | ||||
|     return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::value_known_statically(i, *this); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template<typename FirstType, typename... OtherTypes> size_t array_prod(const IndexList<FirstType, OtherTypes...>& sizes) { | ||||
|   size_t result = 1; | ||||
|   for (int i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) { | ||||
|     result *= sizes[i]; | ||||
|   } | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| template<typename FirstType, typename... OtherTypes> struct array_size<IndexList<FirstType, OtherTypes...> > { | ||||
|   static const size_t value = array_size<IndexTuple<FirstType, OtherTypes...> >::value; | ||||
| }; | ||||
| template<typename FirstType, typename... OtherTypes> struct array_size<const IndexList<FirstType, OtherTypes...> > { | ||||
|   static const size_t value = array_size<IndexTuple<FirstType, OtherTypes...> >::value; | ||||
| }; | ||||
|  | ||||
| template<typename FirstType, typename... OtherTypes> struct array_size<IndexPairList<FirstType, OtherTypes...> > { | ||||
|   static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value; | ||||
| }; | ||||
| template<typename FirstType, typename... OtherTypes> struct array_size<const IndexPairList<FirstType, OtherTypes...> > { | ||||
|   static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value; | ||||
| }; | ||||
|  | ||||
| template<DenseIndex N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(IndexList<FirstType, OtherTypes...>& a) { | ||||
|   return IndexTupleExtractor<N, FirstType, OtherTypes...>::get_val(a); | ||||
| } | ||||
| template<DenseIndex N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(const IndexList<FirstType, OtherTypes...>& a) { | ||||
|   return IndexTupleExtractor<N, FirstType, OtherTypes...>::get_val(a); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| struct index_known_statically_impl { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_known_statically_impl<IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_known_statically_impl<const IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> | ||||
| struct all_indices_known_statically_impl { | ||||
|   static constexpr bool run() { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct all_indices_known_statically_impl<IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return IndexList<FirstType, OtherTypes...>().all_values_known_statically(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct all_indices_known_statically_impl<const IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return IndexList<FirstType, OtherTypes...>().all_values_known_statically(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> | ||||
| struct indices_statically_known_to_increase_impl { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
|   struct indices_statically_known_to_increase_impl<IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return Eigen::IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
|   struct indices_statically_known_to_increase_impl<const IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run() { | ||||
|     return Eigen::IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename Tx> | ||||
| struct index_statically_eq_impl { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexList<FirstType, OtherTypes...>().get(i) == value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_statically_eq_impl<const IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexList<FirstType, OtherTypes...>().get(i) == value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> | ||||
| struct index_statically_ne_impl { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexList<FirstType, OtherTypes...>().get(i) != value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_statically_ne_impl<const IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexList<FirstType, OtherTypes...>().get(i) != value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T> | ||||
| struct index_statically_gt_impl { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexList<FirstType, OtherTypes...>().get(i) > value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_statically_gt_impl<const IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexList<FirstType, OtherTypes...>().get(i) > value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| template <typename T> | ||||
| struct index_statically_lt_impl { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexList<FirstType, OtherTypes...>().get(i) < value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_statically_lt_impl<const IndexList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexList<FirstType, OtherTypes...>().get(i) < value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| template <typename Tx> | ||||
| struct index_pair_first_statically_eq_impl { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_pair_first_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| template <typename Tx> | ||||
| struct index_pair_second_statically_eq_impl { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename FirstType, typename... OtherTypes> | ||||
| struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > { | ||||
|   EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { | ||||
|     return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) & | ||||
|         (IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| }  // end namespace internal | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #else | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
| template <typename T> | ||||
| struct index_known_statically_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| struct all_indices_known_statically_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| struct indices_statically_known_to_increase_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| struct index_statically_eq_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| struct index_statically_ne_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| struct index_statically_gt_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| struct index_statically_lt_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Tx> | ||||
| struct index_pair_first_statically_eq_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Tx> | ||||
| struct index_pair_second_statically_eq_impl { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| }  // end namespace internal | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_known_statically(DenseIndex i) { | ||||
|   return index_known_statically_impl<T>::run(i); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all_indices_known_statically() { | ||||
|   return all_indices_known_statically_impl<T>::run(); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool indices_statically_known_to_increase() { | ||||
|   return indices_statically_known_to_increase_impl<T>::run(); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_eq(DenseIndex i, DenseIndex value) { | ||||
|   return index_statically_eq_impl<T>::run(i, value); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_ne(DenseIndex i, DenseIndex value) { | ||||
|   return index_statically_ne_impl<T>::run(i, value); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_gt(DenseIndex i, DenseIndex value) { | ||||
|   return index_statically_gt_impl<T>::run(i, value); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(DenseIndex i, DenseIndex value) { | ||||
|   return index_statically_lt_impl<T>::run(i, value); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_first_statically_eq(DenseIndex i, DenseIndex value) { | ||||
|   return index_pair_first_statically_eq_impl<T>::run(i, value); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_second_statically_eq(DenseIndex i, DenseIndex value) { | ||||
|   return index_pair_second_statically_eq_impl<T>::run(i, value); | ||||
| } | ||||
|  | ||||
| }  // end namespace internal | ||||
| }  // end namespace Eigen | ||||
|  | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H | ||||
							
								
								
									
										229
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										229
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,229 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Ke Yang <yangke@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorInflation | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor inflation class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename Strides, typename XprType> | ||||
| struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename Strides, typename XprType> | ||||
| struct eval<TensorInflationOp<Strides, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorInflationOp<Strides, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Strides, typename XprType> | ||||
| struct nested<TensorInflationOp<Strides, XprType>, 1, typename eval<TensorInflationOp<Strides, XprType> >::type> | ||||
| { | ||||
|   typedef TensorInflationOp<Strides, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| template<typename Strides, typename XprType> | ||||
| class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorInflationOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorInflationOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorInflationOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorInflationOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides) | ||||
|       : m_xpr(expr), m_strides(strides) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const Strides& strides() const { return m_strides; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const Strides m_strides; | ||||
| }; | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename Strides, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorInflationOp<Strides, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device), m_strides(op.strides()) | ||||
|   { | ||||
|     m_dimensions = m_impl.dimensions(); | ||||
|     // Expand each dimension to the inflated dimension. | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1; | ||||
|     } | ||||
|  | ||||
|     // Remember the strides for fast division. | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       m_fastStrides[i] = internal::TensorIntDivisor<Index>(m_strides[i]); | ||||
|     } | ||||
|  | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_outputStrides[0] = 1; | ||||
|       m_inputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; | ||||
|         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; | ||||
|       } | ||||
|     } else {  // RowMajor | ||||
|       m_outputStrides[NumDims-1] = 1; | ||||
|       m_inputStrides[NumDims-1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; | ||||
|         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   // Computes the input index given the output index. Returns true if the output | ||||
|   // index doesn't fall into a hole. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const | ||||
|   { | ||||
|     eigen_assert(index < dimensions().TotalSize()); | ||||
|     *inputIndex = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         if (idx != idx / m_fastStrides[i] * m_strides[i]) { | ||||
|           return false; | ||||
|         } | ||||
|         *inputIndex += idx / m_strides[i] * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       if (index != index / m_fastStrides[0] * m_strides[0]) { | ||||
|         return false; | ||||
|       } | ||||
|       *inputIndex += index / m_strides[0]; | ||||
|       return true; | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         if (idx != idx / m_fastStrides[i] * m_strides[i]) { | ||||
|           return false; | ||||
|         } | ||||
|         *inputIndex += idx / m_strides[i] * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) { | ||||
|         return false; | ||||
|       } | ||||
|       *inputIndex += index / m_strides[NumDims - 1]; | ||||
|     } | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     Index inputIndex = 0; | ||||
|     if (getInputIndex(index, &inputIndex)) { | ||||
|      return m_impl.coeff(inputIndex); | ||||
|     } else { | ||||
|      return Scalar(0); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // TODO(yangke): optimize this function so that we can detect and produce | ||||
|   // all-zero packets | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|     for (int i = 0; i < PacketSize; ++i) { | ||||
|       values[i] = coeff(index+i); | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     const double compute_cost = NumDims * (3 * TensorOpCost::DivCost<Index>() + | ||||
|                                            3 * TensorOpCost::MulCost<Index>() + | ||||
|                                            2 * TensorOpCost::AddCost<Index>()); | ||||
|     const double input_size = m_impl.dimensions().TotalSize(); | ||||
|     const double output_size = m_dimensions.TotalSize(); | ||||
|     if (output_size == 0) | ||||
|       return TensorOpCost(); | ||||
|     return m_impl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(sizeof(CoeffReturnType) * input_size / output_size, 0, | ||||
|                         compute_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|  protected: | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims> m_outputStrides; | ||||
|   array<Index, NumDims> m_inputStrides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   const Strides m_strides; | ||||
|   array<internal::TensorIntDivisor<Index>, NumDims> m_fastStrides; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H | ||||
							
								
								
									
										82
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|  | ||||
| #include <initializer_list> | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorInitializer | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Helper template to initialize Tensors from std::initializer_lists. | ||||
|   */ | ||||
| namespace internal { | ||||
|  | ||||
| template <typename Derived, int N> | ||||
| struct Initializer { | ||||
|   typedef std::initializer_list< | ||||
|     typename Initializer<Derived, N - 1>::InitList> InitList; | ||||
|  | ||||
|   static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, | ||||
|                   Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices, | ||||
|                   const InitList& vals) { | ||||
|     int i = 0; | ||||
|     for (auto v : vals) { | ||||
|       (*indices)[traits<Derived>::NumDimensions - N] = i++; | ||||
|       Initializer<Derived, N - 1>::run(tensor, indices, v); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Derived> | ||||
| struct Initializer<Derived, 1> { | ||||
|   typedef std::initializer_list<typename traits<Derived>::Scalar> InitList; | ||||
|  | ||||
|   static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, | ||||
|                   Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices, | ||||
|                   const InitList& vals) { | ||||
|     int i = 0; | ||||
|     // There is likely a faster way to do that than iterating. | ||||
|     for (auto v : vals) { | ||||
|       (*indices)[traits<Derived>::NumDimensions - 1] = i++; | ||||
|       tensor.coeffRef(*indices) = v; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Derived> | ||||
| struct Initializer<Derived, 0> { | ||||
|   typedef typename traits<Derived>::Scalar InitList; | ||||
|  | ||||
|   static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, | ||||
|                   Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>*, | ||||
|                   const InitList& v) { | ||||
|     tensor.coeffRef(0) = v; | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename Derived, int N> | ||||
| void initialize_tensor(TensorEvaluator<Derived, DefaultDevice>& tensor, | ||||
|                        const typename Initializer<Derived, traits<Derived>::NumDimensions>::InitList& vals) { | ||||
|   Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions> indices; | ||||
|   Initializer<Derived, traits<Derived>::NumDimensions>::run(tensor, &indices, vals); | ||||
| } | ||||
|  | ||||
| }  // namespace internal | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|  | ||||
| #endif  // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H | ||||
							
								
								
									
										253
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										253
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,253 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class TensorIntDiv | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Fast integer division by a constant. | ||||
|   * | ||||
|   * See the paper from Granlund and Montgomery for explanation. | ||||
|   *   (at http://dx.doi.org/10.1145/773473.178249) | ||||
|   * | ||||
|   * \sa Tensor | ||||
|   */ | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| namespace { | ||||
|  | ||||
|   // Note: result is undefined if val == 0 | ||||
|   template <typename T> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
|   typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val) | ||||
|   { | ||||
| #ifdef __CUDA_ARCH__ | ||||
|     return __clz(val); | ||||
| #elif EIGEN_COMP_MSVC | ||||
|     unsigned long index; | ||||
|     _BitScanReverse(&index, val); | ||||
|     return 31 - index; | ||||
| #else | ||||
|     EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     return __builtin_clz(static_cast<uint32_t>(val)); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   template <typename T> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
|   typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val) | ||||
|   { | ||||
| #ifdef __CUDA_ARCH__ | ||||
|     return __clzll(val); | ||||
| #elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 | ||||
|     unsigned long index; | ||||
|     _BitScanReverse64(&index, val); | ||||
|     return 63 - index; | ||||
| #elif EIGEN_COMP_MSVC | ||||
|     // MSVC's _BitScanReverse64 is not available for 32bits builds. | ||||
|     unsigned int lo = (unsigned int)(val&0xffffffff); | ||||
|     unsigned int hi = (unsigned int)((val>>32)&0xffffffff); | ||||
|     int n; | ||||
|     if(hi==0) | ||||
|       n = 32 + count_leading_zeros<unsigned int>(lo); | ||||
|     else | ||||
|       n = count_leading_zeros<unsigned int>(hi); | ||||
|     return n; | ||||
| #else | ||||
|     EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     return __builtin_clzll(static_cast<uint64_t>(val)); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   template <typename T> | ||||
|   struct UnsignedTraits { | ||||
|     typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type; | ||||
|   }; | ||||
|  | ||||
|   template <typename T> | ||||
|   struct DividerTraits { | ||||
|     typedef typename UnsignedTraits<T>::type type; | ||||
|     static const int N = sizeof(T) * 8; | ||||
|   }; | ||||
|  | ||||
|   template <typename T> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { | ||||
| #if defined(__CUDA_ARCH__) | ||||
|     return __umulhi(a, b); | ||||
| #else | ||||
|     return (static_cast<uint64_t>(a) * b) >> 32; | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   template <typename T> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { | ||||
| #if defined(__CUDA_ARCH__) | ||||
|     return __umul64hi(a, b); | ||||
| #elif defined(__SIZEOF_INT128__) | ||||
|     __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); | ||||
|     return static_cast<uint64_t>(v >> 64); | ||||
| #else | ||||
|     return (TensorUInt128<static_val<0>, uint64_t>(a) * TensorUInt128<static_val<0>, uint64_t>(b)).upper(); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   template <int N, typename T> | ||||
|   struct DividerHelper { | ||||
|     static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) { | ||||
|       EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|       return static_cast<uint32_t>((static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   template <typename T> | ||||
|   struct DividerHelper<64, T> { | ||||
|     static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { | ||||
| #if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) | ||||
|       return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); | ||||
| #else | ||||
|       const uint64_t shift = 1ULL << log_div; | ||||
|       TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) | ||||
|                                                - TensorUInt128<static_val<1>, static_val<0> >(1, 0) | ||||
|                                                + TensorUInt128<static_val<0>, static_val<1> >(1); | ||||
|       return static_cast<uint64_t>(result); | ||||
| #endif | ||||
|     } | ||||
|   }; | ||||
| } | ||||
|  | ||||
|  | ||||
| template <typename T, bool div_gt_one = false> | ||||
| struct TensorIntDivisor { | ||||
|  public: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { | ||||
|     multiplier = 0; | ||||
|     shift1 = 0; | ||||
|     shift2 = 0; | ||||
|   } | ||||
|  | ||||
|   // Must have 0 < divider < 2^31. This is relaxed to | ||||
|   // 0 < divider < 2^63 when using 64-bit indices on platforms that support | ||||
|   // the __uint128_t type. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { | ||||
|     const int N = DividerTraits<T>::N; | ||||
|     eigen_assert(static_cast<typename UnsignedTraits<T>::type>(divider) < NumTraits<UnsignedType>::highest()/2); | ||||
|     eigen_assert(divider > 0); | ||||
|  | ||||
|     // fast ln2 | ||||
|     const int leading_zeros = count_leading_zeros(static_cast<UnsignedType>(divider)); | ||||
|     int log_div = N - leading_zeros; | ||||
|     // if divider is a power of two then log_div is 1 more than it should be. | ||||
|     if ((static_cast<typename UnsignedTraits<T>::type>(1) << (log_div-1)) == static_cast<typename UnsignedTraits<T>::type>(divider)) | ||||
|       log_div--; | ||||
|  | ||||
|     multiplier = DividerHelper<N, T>::computeMultiplier(log_div, divider); | ||||
|     shift1 = log_div > 1 ? 1 : log_div; | ||||
|     shift2 = log_div > 1 ? log_div-1 : 0; | ||||
|   } | ||||
|  | ||||
|   // Must have 0 <= numerator. On platforms that dont support the __uint128_t | ||||
|   // type numerator should also be less than 2^32-1. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { | ||||
|     eigen_assert(static_cast<typename UnsignedTraits<T>::type>(numerator) < NumTraits<UnsignedType>::highest()/2); | ||||
|     //eigen_assert(numerator >= 0); // this is implicitly asserted by the line above | ||||
|  | ||||
|     UnsignedType t1 = muluh(multiplier, numerator); | ||||
|     UnsignedType t = (static_cast<UnsignedType>(numerator) - t1) >> shift1; | ||||
|     return (t1 + t) >> shift2; | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   typedef typename DividerTraits<T>::type UnsignedType; | ||||
|   UnsignedType multiplier; | ||||
|   int32_t shift1; | ||||
|   int32_t shift2; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Optimized version for signed 32 bit integers. | ||||
| // Derived from Hacker's Delight. | ||||
| // Only works for divisors strictly greater than one | ||||
| template <> | ||||
| class TensorIntDivisor<int32_t, true> { | ||||
|  public: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { | ||||
|     magic = 0; | ||||
|     shift = 0; | ||||
|   } | ||||
|   // Must have 2 <= divider | ||||
|   EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider)  { | ||||
|     eigen_assert(divider >= 2); | ||||
|     calcMagic(divider); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const { | ||||
| #ifdef __CUDA_ARCH__ | ||||
|     return (__umulhi(magic, n) >> shift); | ||||
| #else | ||||
|     uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n); | ||||
|     return (static_cast<uint32_t>(v >> 32) >> shift); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
| private: | ||||
|   // Compute the magic numbers. See Hacker's Delight section 10 for an in | ||||
|   // depth explanation. | ||||
|   EIGEN_DEVICE_FUNC void calcMagic(int32_t d) { | ||||
|    const unsigned two31 = 0x80000000;     // 2**31. | ||||
|    unsigned ad = d; | ||||
|    unsigned t = two31 + (ad >> 31); | ||||
|    unsigned anc = t - 1 - t%ad;     // Absolute value of nc. | ||||
|    int p = 31;                      // Init. p. | ||||
|    unsigned q1 = two31/anc;         // Init. q1 = 2**p/|nc|. | ||||
|    unsigned r1 = two31 - q1*anc;    // Init. r1 = rem(2**p, |nc|). | ||||
|    unsigned q2 = two31/ad;          // Init. q2 = 2**p/|d|. | ||||
|    unsigned r2 = two31 - q2*ad;     // Init. r2 = rem(2**p, |d|). | ||||
|    unsigned delta = 0; | ||||
|    do { | ||||
|       p = p + 1; | ||||
|       q1 = 2*q1;           // Update q1 = 2**p/|nc|. | ||||
|       r1 = 2*r1;           // Update r1 = rem(2**p, |nc|). | ||||
|       if (r1 >= anc) {     // (Must be an unsigned | ||||
|          q1 = q1 + 1;      // comparison here). | ||||
|          r1 = r1 - anc;} | ||||
|       q2 = 2*q2;           // Update q2 = 2**p/|d|. | ||||
|       r2 = 2*r2;           // Update r2 = rem(2**p, |d|). | ||||
|       if (r2 >= ad) {      // (Must be an unsigned | ||||
|          q2 = q2 + 1;      // comparison here). | ||||
|          r2 = r2 - ad;} | ||||
|       delta = ad - r2; | ||||
|    } while (q1 < delta || (q1 == delta && r1 == 0)); | ||||
|  | ||||
|    magic = (unsigned)(q2 + 1); | ||||
|    shift = p - 32; | ||||
|   } | ||||
|  | ||||
|   uint32_t magic; | ||||
|   int32_t shift; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T, bool div_gt_one> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) { | ||||
|   return divisor.divide(numerator); | ||||
| } | ||||
|  | ||||
|  | ||||
| } // end namespace internal | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H | ||||
							
								
								
									
										209
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										209
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,209 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorLayoutSwap | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Swap the layout from col-major to row-major, or row-major | ||||
|   * to col-major, and invert the order of the dimensions. | ||||
|   * | ||||
|   * Beware: the dimensions are reversed by this operation. If you want to | ||||
|   * preserve the ordering of the dimensions, you need to combine this | ||||
|   * operation with a shuffle. | ||||
|   * | ||||
|   * \example: | ||||
|   * Tensor<float, 2, ColMajor> input(2, 4); | ||||
|   * Tensor<float, 2, RowMajor> output = input.swap_layout(); | ||||
|   * eigen_assert(output.dimension(0) == 4); | ||||
|   * eigen_assert(output.dimension(1) == 2); | ||||
|   * | ||||
|   * array<int, 2> shuffle(1, 0); | ||||
|   * output = input.swap_layout().shuffle(shuffle); | ||||
|   * eigen_assert(output.dimension(0) == 2); | ||||
|   * eigen_assert(output.dimension(1) == 4); | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename XprType> | ||||
| struct traits<TensorLayoutSwapOp<XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = traits<XprType>::NumDimensions; | ||||
|   static const int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor; | ||||
| }; | ||||
|  | ||||
| template<typename XprType> | ||||
| struct eval<TensorLayoutSwapOp<XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorLayoutSwapOp<XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename XprType> | ||||
| struct nested<TensorLayoutSwapOp<XprType>, 1, typename eval<TensorLayoutSwapOp<XprType> >::type> | ||||
| { | ||||
|   typedef TensorLayoutSwapOp<XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename XprType> | ||||
| class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorLayoutSwapOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp(const XprType& expr) | ||||
|       : m_xpr(expr) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const TensorLayoutSwapOp& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> | ||||
| { | ||||
|   typedef TensorLayoutSwapOp<ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device) | ||||
|   { | ||||
|     for(int i = 0; i < NumDims; ++i) { | ||||
|       m_dimensions[i] = m_impl.dimensions()[NumDims-1-i]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { | ||||
|     return m_impl.evalSubExprsIfNeeded(data); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_impl.coeff(index); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return m_impl.template packet<LoadMode>(index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     return m_impl.costPerCoeff(vectorized); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return m_impl.data(); } | ||||
|  | ||||
|   const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } | ||||
|  | ||||
|  protected: | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   Dimensions m_dimensions; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as lvalue | ||||
| template<typename ArgType, typename Device> | ||||
|   struct TensorEvaluator<TensorLayoutSwapOp<ArgType>, Device> | ||||
|   : public TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> | ||||
| { | ||||
|   typedef TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> Base; | ||||
|   typedef TensorLayoutSwapOp<ArgType> XprType; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor, | ||||
|     CoordAccess = false  // to be implemented | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : Base(op, device) | ||||
|   { } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) | ||||
|   { | ||||
|     return this->m_impl.coeffRef(index); | ||||
|   } | ||||
|   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) | ||||
|   { | ||||
|     this->m_impl.template writePacket<StoreMode>(index, x); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H | ||||
							
								
								
									
										54
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H | ||||
|  | ||||
|  | ||||
| /** use this macro in sfinae selection in templated functions | ||||
|  * | ||||
|  *   template<typename T, | ||||
|  *            typename std::enable_if< isBanana<T>::value , int >::type = 0 | ||||
|  *   > | ||||
|  *   void foo(){} | ||||
|  * | ||||
|  *   becomes => | ||||
|  * | ||||
|  *   template<typename TopoType, | ||||
|  *           SFINAE_ENABLE_IF( isBanana<T>::value ) | ||||
|  *   > | ||||
|  *   void foo(){} | ||||
|  */ | ||||
|  | ||||
| // SFINAE requires variadic templates | ||||
| #ifndef __CUDACC__ | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|   // SFINAE doesn't work for gcc <= 4.7 | ||||
|   #ifdef EIGEN_COMP_GNUC | ||||
|     #if EIGEN_GNUC_AT_LEAST(4,8) | ||||
|       #define EIGEN_HAS_SFINAE | ||||
|     #endif | ||||
|   #else | ||||
|     #define EIGEN_HAS_SFINAE | ||||
|   #endif | ||||
| #endif | ||||
| #endif | ||||
|  | ||||
| #define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ | ||||
|     typename internal::enable_if< ( __condition__ ) , int >::type = 0 | ||||
|  | ||||
|  | ||||
| #if EIGEN_HAS_CONSTEXPR | ||||
| #define EIGEN_CONSTEXPR constexpr | ||||
| #else | ||||
| #define EIGEN_CONSTEXPR | ||||
| #endif | ||||
|  | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										323
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										323
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,323 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_MAP_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| // FIXME use proper doxygen documentation (e.g. \tparam MakePointer_) | ||||
|  | ||||
| /** \class TensorMap | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief A tensor expression mapping an existing array of data. | ||||
|   * | ||||
|   */ | ||||
| /// `template <class> class MakePointer_` is added to convert the host pointer to the device pointer. | ||||
| /// It is added due to the fact that for our device compiler `T*` is not allowed. | ||||
| /// If we wanted to use the same Evaluator functions we have to convert that type to our pointer `T`. | ||||
| /// This is done through our `MakePointer_` class. By default the Type in the `MakePointer_<T>` is `T*` . | ||||
| /// Therefore, by adding the default value, we managed to convert the type and it does not break any | ||||
| /// existing code as its default value is `T*`. | ||||
| template<typename PlainObjectType, int Options_, template <class> class MakePointer_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> > | ||||
| { | ||||
|   public: | ||||
|     typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self; | ||||
|     typedef typename PlainObjectType::Base Base; | ||||
|     typedef typename Eigen::internal::nested<Self>::type Nested; | ||||
|     typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; | ||||
|     typedef typename internal::traits<PlainObjectType>::Index Index; | ||||
|     typedef typename internal::traits<PlainObjectType>::Scalar Scalar; | ||||
|     typedef typename NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef typename Base::CoeffReturnType CoeffReturnType; | ||||
|  | ||||
|   /*    typedef typename internal::conditional< | ||||
|                          bool(internal::is_lvalue<PlainObjectType>::value), | ||||
|                          Scalar *, | ||||
|                          const Scalar *>::type | ||||
|                      PointerType;*/ | ||||
|     typedef typename MakePointer_<Scalar>::Type PointerType; | ||||
|     typedef PointerType PointerArgType; | ||||
|  | ||||
|     static const int Options = Options_; | ||||
|  | ||||
|     static const Index NumIndices = PlainObjectType::NumIndices; | ||||
|     typedef typename PlainObjectType::Dimensions Dimensions; | ||||
|  | ||||
|     enum { | ||||
|       IsAligned = ((int(Options_)&Aligned)==Aligned), | ||||
|       Layout = PlainObjectType::Layout, | ||||
|       CoordAccess = true, | ||||
|       RawAccess = true | ||||
|     }; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() { | ||||
|       // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { | ||||
|       // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
| #else | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) { | ||||
|       // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. | ||||
|       EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) { | ||||
|       EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) { | ||||
|       EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) { | ||||
|       EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) { | ||||
|       EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array<Index, NumIndices>& dimensions) | ||||
|       : m_data(dataPtr), m_dimensions(dimensions) | ||||
|     { } | ||||
|  | ||||
|     template <typename Dimensions> | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) | ||||
|       : m_data(dataPtr), m_dimensions(dimensions) | ||||
|     { } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor) | ||||
|       : m_data(tensor.data()), m_dimensions(tensor.dimensions()) | ||||
|     { } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_dimensions[n]; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE PointerType data() { return m_data; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const PointerType data() const { return m_data; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       //      eigen_assert(checkIndexRange(indices)); | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = m_dimensions.IndexOfRowMajor(indices); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = m_dimensions.IndexOfColMajor(indices); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()() const | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return m_data[0]; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const | ||||
|     { | ||||
|       eigen_internal_assert(index >= 0 && index < size()); | ||||
|       return m_data[index]; | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = m_dimensions.IndexOfColMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
| #else | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const | ||||
|     { | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = i1 + i0 * m_dimensions[1]; | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + i1 * m_dimensions[0]; | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const | ||||
|     { | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|          const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); | ||||
|          return m_data[index]; | ||||
|       } else { | ||||
|          const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const | ||||
|     { | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const | ||||
|     { | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) | ||||
|     { | ||||
|       //      eigen_assert(checkIndexRange(indices)); | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = m_dimensions.IndexOfRowMajor(indices); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = m_dimensions.IndexOfColMajor(indices); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()() | ||||
|     { | ||||
|       EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|       return m_data[0]; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index index) | ||||
|     { | ||||
|       eigen_internal_assert(index >= 0 && index < size()); | ||||
|       return m_data[index]; | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) | ||||
|     { | ||||
|       static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); | ||||
|       const std::size_t NumDims = sizeof...(otherIndices) + 2; | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}}); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = m_dimensions.IndexOfColMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}}); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
| #else | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) | ||||
|     { | ||||
|        if (PlainObjectType::Options&RowMajor) { | ||||
|          const Index index = i1 + i0 * m_dimensions[1]; | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + i1 * m_dimensions[0]; | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) | ||||
|     { | ||||
|        if (PlainObjectType::Options&RowMajor) { | ||||
|          const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|          const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) | ||||
|     { | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) | ||||
|     { | ||||
|       if (PlainObjectType::Options&RowMajor) { | ||||
|         const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); | ||||
|         return m_data[index]; | ||||
|       } else { | ||||
|         const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); | ||||
|         return m_data[index]; | ||||
|       } | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Self& operator=(const Self& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<Self, const Self> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|     Self& operator=(const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<Self, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   private: | ||||
|     typename MakePointer_<Scalar>::Type m_data; | ||||
|     Dimensions m_dimensions; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_MAP_H | ||||
							
								
								
									
										218
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										218
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,218 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_META_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| template<bool cond> struct Cond {}; | ||||
|  | ||||
| template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| const T1& choose(Cond<true>, const T1& first, const T2&) { | ||||
|   return first; | ||||
| } | ||||
|  | ||||
| template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| const T2& choose(Cond<false>, const T1&, const T2& second) { | ||||
|   return second; | ||||
| } | ||||
|  | ||||
|  | ||||
| template <typename T, typename X, typename Y> | ||||
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| T divup(const X x, const Y y) { | ||||
|   return static_cast<T>((x + y - 1) / y); | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| T divup(const T x, const T y) { | ||||
|   return static_cast<T>((x + y - 1) / y); | ||||
| } | ||||
|  | ||||
| template <size_t n> struct max_n_1 { | ||||
|   static const size_t size = n; | ||||
| }; | ||||
| template <> struct max_n_1<0> { | ||||
|   static const size_t size = 1; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Default packet types | ||||
| template <typename Scalar, typename Device> | ||||
| struct PacketType : internal::packet_traits<Scalar> { | ||||
|   typedef typename internal::packet_traits<Scalar>::type type; | ||||
| }; | ||||
|  | ||||
| // For CUDA packet types when using a GpuDevice | ||||
| #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16) | ||||
| template <> | ||||
| struct PacketType<half, GpuDevice> { | ||||
|   typedef half2 type; | ||||
|   static const int size = 2; | ||||
|   enum { | ||||
|     HasAdd    = 1, | ||||
|     HasSub    = 1, | ||||
|     HasMul    = 1, | ||||
|     HasNegate = 1, | ||||
|     HasAbs    = 1, | ||||
|     HasArg    = 0, | ||||
|     HasAbs2   = 0, | ||||
|     HasMin    = 1, | ||||
|     HasMax    = 1, | ||||
|     HasConj   = 0, | ||||
|     HasSetLinear = 0, | ||||
|     HasBlend  = 0, | ||||
|  | ||||
|     HasDiv    = 1, | ||||
|     HasSqrt   = 1, | ||||
|     HasRsqrt  = 1, | ||||
|     HasExp    = 1, | ||||
|     HasLog    = 1, | ||||
|     HasLog1p  = 0, | ||||
|     HasLog10  = 0, | ||||
|     HasPow    = 1, | ||||
|   }; | ||||
| }; | ||||
| #endif | ||||
|  | ||||
| #if defined(EIGEN_USE_SYCL) | ||||
| template <typename T> | ||||
|   struct PacketType<T, SyclDevice> { | ||||
|   typedef T type; | ||||
|   static const int size = 1; | ||||
|   enum { | ||||
|     HasAdd    = 0, | ||||
|     HasSub    = 0, | ||||
|     HasMul    = 0, | ||||
|     HasNegate = 0, | ||||
|     HasAbs    = 0, | ||||
|     HasArg    = 0, | ||||
|     HasAbs2   = 0, | ||||
|     HasMin    = 0, | ||||
|     HasMax    = 0, | ||||
|     HasConj   = 0, | ||||
|     HasSetLinear = 0, | ||||
|     HasBlend  = 0 | ||||
|   }; | ||||
| }; | ||||
| #endif | ||||
|  | ||||
|  | ||||
| // Tuple mimics std::pair but works on e.g. nvcc. | ||||
| template <typename U, typename V> struct Tuple { | ||||
|  public: | ||||
|   U first; | ||||
|   V second; | ||||
|  | ||||
|   typedef U first_type; | ||||
|   typedef V second_type; | ||||
|  | ||||
|   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Tuple() : first(), second() {} | ||||
|  | ||||
|   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Tuple(const U& f, const V& s) : first(f), second(s) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Tuple& operator= (const Tuple& rhs) { | ||||
|     if (&rhs == this) return *this; | ||||
|     first = rhs.first; | ||||
|     second = rhs.second; | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void swap(Tuple& rhs) { | ||||
|     using numext::swap; | ||||
|     swap(first, rhs.first); | ||||
|     swap(second, rhs.second); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename U, typename V> | ||||
| EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) { | ||||
|   return (x.first == y.first && x.second == y.second); | ||||
| } | ||||
|  | ||||
| template <typename U, typename V> | ||||
| EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) { | ||||
|   return !(x == y); | ||||
| } | ||||
|  | ||||
|  | ||||
| // Can't use std::pairs on cuda devices | ||||
| template <typename Idx> struct IndexPair { | ||||
|   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) {} | ||||
|   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Idx f, Idx s) : first(f), second(s) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC void set(IndexPair<Idx> val) { | ||||
|     first = val.first; | ||||
|     second = val.second; | ||||
|   } | ||||
|  | ||||
|   Idx first; | ||||
|   Idx second; | ||||
| }; | ||||
|  | ||||
|  | ||||
| #ifdef EIGEN_HAS_SFINAE | ||||
| namespace internal { | ||||
|  | ||||
|   template<typename IndexType, Index... Is> | ||||
|   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) { | ||||
|     return { idx[Is]... }; | ||||
|   } | ||||
|   template<typename IndexType> | ||||
|   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   array<Index, 0> customIndices2Array(IndexType&, numeric_list<Index>) { | ||||
|     return array<Index, 0>(); | ||||
|   } | ||||
|  | ||||
|   /** Make an array (for index/dimensions) out of a custom index */ | ||||
|   template<typename Index, std::size_t NumIndices, typename IndexType> | ||||
|   EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   array<Index, NumIndices> customIndices2Array(IndexType& idx) { | ||||
|     return customIndices2Array(idx, typename gen_numeric_list<Index, NumIndices>::type{}); | ||||
|   } | ||||
|  | ||||
|  | ||||
|   template <typename B, typename D> | ||||
|   struct is_base_of | ||||
|   { | ||||
|  | ||||
|     typedef char (&yes)[1]; | ||||
|     typedef char (&no)[2]; | ||||
|  | ||||
|     template <typename BB, typename DD> | ||||
|     struct Host | ||||
|     { | ||||
|       operator BB*() const; | ||||
|       operator DD*(); | ||||
|     }; | ||||
|  | ||||
|     template<typename T> | ||||
|     static yes check(D*, T); | ||||
|     static no check(B*, int); | ||||
|  | ||||
|     static const bool value = sizeof(check(Host<B,D>(), int())) == sizeof(yes); | ||||
|   }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
|  | ||||
|  | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_TENSOR_TENSOR_META_H | ||||
							
								
								
									
										888
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										888
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,888 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorReshaping | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor reshaping class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename NewDimensions, typename XprType> | ||||
| struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = array_size<NewDimensions>::value; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename NewDimensions, typename XprType> | ||||
| struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorReshapingOp<NewDimensions, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename NewDimensions, typename XprType> | ||||
| struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type> | ||||
| { | ||||
|   typedef TensorReshapingOp<NewDimensions, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename NewDimensions, typename XprType> | ||||
| class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar; | ||||
|   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims) | ||||
|       : m_xpr(expr), m_dims(dims) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const NewDimensions& dimensions() const { return m_dims; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const NewDimensions m_dims; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename NewDimensions, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorReshapingOp<NewDimensions, ArgType> XprType; | ||||
|   typedef NewDimensions Dimensions; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device), m_dimensions(op.dimensions()) | ||||
|   { | ||||
|     // The total size of the reshaped tensor must be equal to the total size | ||||
|     // of the input tensor. | ||||
|     eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); | ||||
|   } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { | ||||
|     return m_impl.evalSubExprsIfNeeded(data); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_impl.coeff(index); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     return m_impl.template packet<LoadMode>(index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     return m_impl.costPerCoeff(vectorized); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } | ||||
|  | ||||
|  protected: | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   NewDimensions m_dimensions; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as lvalue | ||||
| template<typename NewDimensions, typename ArgType, typename Device> | ||||
|   struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device> | ||||
|   : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> | ||||
|  | ||||
| { | ||||
|   typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base; | ||||
|   typedef TensorReshapingOp<NewDimensions, ArgType> XprType; | ||||
|   typedef NewDimensions Dimensions; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : Base(op, device) | ||||
|   { } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) | ||||
|   { | ||||
|     return this->m_impl.coeffRef(index); | ||||
|   } | ||||
|   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) | ||||
|   { | ||||
|     this->m_impl.template writePacket<StoreMode>(index, x); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| /** \class TensorSlicing | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor slicing class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename StartIndices, typename Sizes, typename XprType> | ||||
| struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = array_size<StartIndices>::value; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename StartIndices, typename Sizes, typename XprType> | ||||
| struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename StartIndices, typename Sizes, typename XprType> | ||||
| struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type> | ||||
| { | ||||
|   typedef TensorSlicingOp<StartIndices, Sizes, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename StartIndices, typename Sizes, typename XprType> | ||||
| class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes) | ||||
|       : m_xpr(expr), m_indices(indices), m_sizes(sizes) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const StartIndices& startIndices() const { return m_indices; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const Sizes& sizes() const { return m_sizes; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const StartIndices m_indices; | ||||
|     const Sizes m_sizes; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Fixme: figure out the exact threshold | ||||
| namespace { | ||||
| template <typename Index, typename Device> struct MemcpyTriggerForSlicing { | ||||
|   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { } | ||||
|   EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; } | ||||
|  | ||||
|  private: | ||||
|   Index threshold_; | ||||
| }; | ||||
|  | ||||
| // It is very expensive to start the memcpy kernel on GPU: we therefore only | ||||
| // use it for large copies. | ||||
| #ifdef EIGEN_USE_GPU | ||||
| template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice>  { | ||||
|   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { } | ||||
|   EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } | ||||
| }; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename StartIndices, typename Sizes, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; | ||||
|   static const int NumDims = internal::array_size<Sizes>::value; | ||||
|  | ||||
|   enum { | ||||
|     // Alignment can't be guaranteed at compile time since it depends on the | ||||
|     // slice offsets and sizes. | ||||
|     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) | ||||
|   { | ||||
|     for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { | ||||
|       eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); | ||||
|     } | ||||
|  | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     const Sizes& output_dims = op.sizes(); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_inputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; | ||||
|       } | ||||
|  | ||||
|      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. | ||||
|       m_outputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; | ||||
|         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); | ||||
|       } | ||||
|     } else { | ||||
|       m_inputStrides[NumDims-1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; | ||||
|       } | ||||
|  | ||||
|      // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed. | ||||
|       m_outputStrides[NumDims-1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; | ||||
|         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef Sizes Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) { | ||||
|       Index contiguous_values = 1; | ||||
|       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|         for (int i = 0; i < NumDims; ++i) { | ||||
|           contiguous_values *= dimensions()[i]; | ||||
|           if (dimensions()[i] != m_impl.dimensions()[i]) { | ||||
|             break; | ||||
|           } | ||||
|         } | ||||
|       } else { | ||||
|         for (int i = NumDims-1; i >= 0; --i) { | ||||
|           contiguous_values *= dimensions()[i]; | ||||
|           if (dimensions()[i] != m_impl.dimensions()[i]) { | ||||
|             break; | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|       // Use memcpy if it's going to be faster than using the regular evaluation. | ||||
|       const MemcpyTriggerForSlicing<Index, Device> trigger(m_device); | ||||
|       if (trigger(contiguous_values)) { | ||||
|         Scalar* src = (Scalar*)m_impl.data(); | ||||
|         for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { | ||||
|           Index offset = srcCoeff(i); | ||||
|           m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar)); | ||||
|         } | ||||
|         return false; | ||||
|       } | ||||
|     } | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_impl.coeff(srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     const int packetSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); | ||||
|  | ||||
|     Index inputIndices[] = {0, 0}; | ||||
|     Index indices[] = {index, index + packetSize - 1}; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx0 = indices[0] / m_fastOutputStrides[i]; | ||||
|         const Index idx1 = indices[1] / m_fastOutputStrides[i]; | ||||
|         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; | ||||
|         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; | ||||
|         indices[0] -= idx0 * m_outputStrides[i]; | ||||
|         indices[1] -= idx1 * m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndices[0] += (indices[0] + m_offsets[0]); | ||||
|       inputIndices[1] += (indices[1] + m_offsets[0]); | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx0 = indices[0] / m_fastOutputStrides[i]; | ||||
|         const Index idx1 = indices[1] / m_fastOutputStrides[i]; | ||||
|         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; | ||||
|         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; | ||||
|         indices[0] -= idx0 * m_outputStrides[i]; | ||||
|         indices[1] -= idx1 * m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndices[0] += (indices[0] + m_offsets[NumDims-1]); | ||||
|       inputIndices[1] += (indices[1] + m_offsets[NumDims-1]); | ||||
|     } | ||||
|     if (inputIndices[1] - inputIndices[0] == packetSize - 1) { | ||||
|       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); | ||||
|       return rslt; | ||||
|     } | ||||
|     else { | ||||
|       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; | ||||
|       values[0] = m_impl.coeff(inputIndices[0]); | ||||
|       values[packetSize-1] = m_impl.coeff(inputIndices[1]); | ||||
|       for (int i = 1; i < packetSize-1; ++i) { | ||||
|         values[i] = coeff(index+i); | ||||
|       } | ||||
|       PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|       return rslt; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); | ||||
|   } | ||||
|  | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { | ||||
|     Scalar* result = m_impl.data(); | ||||
|     if (result) { | ||||
|       Index offset = 0; | ||||
|       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|         for (int i = 0; i < NumDims; ++i) { | ||||
|           if (m_dimensions[i] != m_impl.dimensions()[i]) { | ||||
|             offset += m_offsets[i] * m_inputStrides[i]; | ||||
|             for (int j = i+1; j < NumDims; ++j) { | ||||
|               if (m_dimensions[j] > 1) { | ||||
|                 return NULL; | ||||
|               } | ||||
|               offset += m_offsets[j] * m_inputStrides[j]; | ||||
|             } | ||||
|             break; | ||||
|           } | ||||
|         } | ||||
|       } else { | ||||
|         for (int i = NumDims - 1; i >= 0; --i) { | ||||
|           if (m_dimensions[i] != m_impl.dimensions()[i]) { | ||||
|             offset += m_offsets[i] * m_inputStrides[i]; | ||||
|             for (int j = i-1; j >= 0; --j) { | ||||
|               if (m_dimensions[j] > 1) { | ||||
|                 return NULL; | ||||
|               } | ||||
|               offset += m_offsets[j] * m_inputStrides[j]; | ||||
|             } | ||||
|             break; | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|       return result + offset; | ||||
|     } | ||||
|     return NULL; | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const | ||||
|   { | ||||
|     Index inputIndex = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx = index / m_fastOutputStrides[i]; | ||||
|         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndex += (index + m_offsets[0]); | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx = index / m_fastOutputStrides[i]; | ||||
|         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndex += (index + m_offsets[NumDims-1]); | ||||
|     } | ||||
|     return inputIndex; | ||||
|   } | ||||
|  | ||||
|   array<Index, NumDims> m_outputStrides; | ||||
|   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; | ||||
|   array<Index, NumDims> m_inputStrides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   const Device& m_device; | ||||
|   Dimensions m_dimensions; | ||||
|   const StartIndices m_offsets; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as lvalue | ||||
| template<typename StartIndices, typename Sizes, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> | ||||
|   : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base; | ||||
|   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; | ||||
|   static const int NumDims = internal::array_size<Sizes>::value; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : Base(op, device) | ||||
|     { } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef Sizes Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) | ||||
|   { | ||||
|     return this->m_impl.coeffRef(this->srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) | ||||
|   { | ||||
|     const int packetSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|     Index inputIndices[] = {0, 0}; | ||||
|     Index indices[] = {index, index + packetSize - 1}; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; | ||||
|         const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; | ||||
|         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; | ||||
|         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; | ||||
|         indices[0] -= idx0 * this->m_outputStrides[i]; | ||||
|         indices[1] -= idx1 * this->m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndices[0] += (indices[0] + this->m_offsets[0]); | ||||
|       inputIndices[1] += (indices[1] + this->m_offsets[0]); | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; | ||||
|         const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; | ||||
|         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; | ||||
|         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; | ||||
|         indices[0] -= idx0 * this->m_outputStrides[i]; | ||||
|         indices[1] -= idx1 * this->m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]); | ||||
|       inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]); | ||||
|     } | ||||
|     if (inputIndices[1] - inputIndices[0] == packetSize - 1) { | ||||
|       this->m_impl.template writePacket<StoreMode>(inputIndices[0], x); | ||||
|     } | ||||
|     else { | ||||
|       EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; | ||||
|       internal::pstore<CoeffReturnType, PacketReturnType>(values, x); | ||||
|       this->m_impl.coeffRef(inputIndices[0]) = values[0]; | ||||
|       this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; | ||||
|       for (int i = 1; i < packetSize-1; ++i) { | ||||
|         this->coeffRef(index+i) = values[i]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| namespace internal { | ||||
| template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> | ||||
| struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = array_size<StartIndices>::value; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> | ||||
| struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> | ||||
| struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type> | ||||
| { | ||||
|   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
| template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> | ||||
| class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > | ||||
| { | ||||
|   public: | ||||
|   typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename internal::nested<TensorStridingSlicingOp>::type Nested; | ||||
|   typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind; | ||||
|   typedef typename internal::traits<TensorStridingSlicingOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp( | ||||
|     const XprType& expr, const StartIndices& startIndices, | ||||
|     const StopIndices& stopIndices, const Strides& strides) | ||||
|       : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), | ||||
|         m_strides(strides) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const StartIndices& startIndices() const { return m_startIndices; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const StartIndices& stopIndices() const { return m_stopIndices; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const StartIndices& strides() const { return m_strides; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run( | ||||
|           assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run( | ||||
|           assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const StartIndices m_startIndices; | ||||
|     const StopIndices m_stopIndices; | ||||
|     const Strides m_strides; | ||||
| }; | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType; | ||||
|   static const int NumDims = internal::array_size<Strides>::value; | ||||
|  | ||||
|   enum { | ||||
|     // Alignment can't be guaranteed at compile time since it depends on the | ||||
|     // slice offsets and sizes. | ||||
|     IsAligned = false, | ||||
|     PacketAccess = false, | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) | ||||
|   { | ||||
|     // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero | ||||
|     DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped; | ||||
|     for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { | ||||
|       eigen_assert(m_strides[i] != 0 && "0 stride is invalid"); | ||||
|       if(m_strides[i]>0){ | ||||
|         startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]); | ||||
|         stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]); | ||||
|       }else{ | ||||
|         /* implies m_strides[i]<0 by assert */ | ||||
|         startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1); | ||||
|         stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1); | ||||
|       } | ||||
|       m_startIndices[i] = startIndicesClamped[i]; | ||||
|     } | ||||
|  | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|  | ||||
|     // check for degenerate intervals and compute output tensor shape | ||||
|     bool degenerate = false;; | ||||
|     for(int i = 0; i < NumDims; i++){ | ||||
|       Index interval = stopIndicesClamped[i] - startIndicesClamped[i]; | ||||
|       if(interval == 0 || ((interval<0) != (m_strides[i]<0))){ | ||||
|         m_dimensions[i] = 0; | ||||
|         degenerate = true; | ||||
|       }else{ | ||||
|         m_dimensions[i] = interval / m_strides[i] | ||||
|                           + (interval % m_strides[i] != 0 ? 1 : 0); | ||||
|         eigen_assert(m_dimensions[i] >= 0); | ||||
|       } | ||||
|     } | ||||
|     Strides output_dims = m_dimensions; | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_inputStrides[0] = m_strides[0]; | ||||
|       m_offsets[0] = startIndicesClamped[0]; | ||||
|       Index previousDimProduct = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         previousDimProduct *= input_dims[i-1]; | ||||
|         m_inputStrides[i] = previousDimProduct * m_strides[i]; | ||||
|         m_offsets[i] = startIndicesClamped[i] * previousDimProduct; | ||||
|       } | ||||
|  | ||||
|       // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. | ||||
|       m_outputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; | ||||
|         // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash | ||||
|         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); | ||||
|       } | ||||
|     } else { | ||||
|       m_inputStrides[NumDims-1] = m_strides[NumDims-1]; | ||||
|       m_offsets[NumDims-1] = startIndicesClamped[NumDims-1]; | ||||
|       Index previousDimProduct = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         previousDimProduct *= input_dims[i+1]; | ||||
|         m_inputStrides[i] = previousDimProduct * m_strides[i]; | ||||
|         m_offsets[i] = startIndicesClamped[i] * previousDimProduct; | ||||
|       } | ||||
|  | ||||
|       m_outputStrides[NumDims-1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; | ||||
|         // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash | ||||
|         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); | ||||
|       } | ||||
|     } | ||||
|     m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), | ||||
|                                           device.lastLevelCacheSize() / | ||||
|                                           sizeof(Scalar)); | ||||
|   } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename internal::remove_const<Scalar>::type ScalarNonConst; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef Strides Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_impl.coeff(srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { | ||||
|     return NULL; | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const | ||||
|   { | ||||
|     Index inputIndex = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i >= 0; --i) { | ||||
|         const Index idx = index / m_fastOutputStrides[i]; | ||||
|         inputIndex += idx * m_inputStrides[i] + m_offsets[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims; ++i) { | ||||
|         const Index idx = index / m_fastOutputStrides[i]; | ||||
|         inputIndex += idx * m_inputStrides[i] + m_offsets[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|     } | ||||
|     return inputIndex; | ||||
|   } | ||||
|  | ||||
|   static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { | ||||
|     return numext::maxi(min, numext::mini(max,value)); | ||||
|   } | ||||
|  | ||||
|   array<Index, NumDims> m_outputStrides; | ||||
|   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; | ||||
|   array<Index, NumDims> m_inputStrides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   const Device& m_device; | ||||
|   DSizes<Index, NumDims> m_startIndices; // clamped startIndices | ||||
|   DSizes<Index, NumDims> m_dimensions; | ||||
|   DSizes<Index, NumDims> m_offsets; // offset in a flattened shape | ||||
|   const Strides m_strides; | ||||
|   std::size_t m_block_total_size_max; | ||||
| }; | ||||
|  | ||||
| // Eval as lvalue | ||||
| template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> | ||||
|   : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base; | ||||
|   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType; | ||||
|   static const int NumDims = internal::array_size<Strides>::value; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = false, | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|     : Base(op, device) | ||||
|     { } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename internal::remove_const<Scalar>::type ScalarNonConst; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef Strides Dimensions; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) | ||||
|   { | ||||
|     return this->m_impl.coeffRef(this->srcCoeff(index)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H | ||||
							
								
								
									
										397
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										397
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,397 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorPadding | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor padding class. | ||||
|   * At the moment only padding with a constant value is supported. | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename PaddingDimensions, typename XprType> | ||||
| struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename PaddingDimensions, typename XprType> | ||||
| struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorPaddingOp<PaddingDimensions, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename PaddingDimensions, typename XprType> | ||||
| struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type> | ||||
| { | ||||
|   typedef TensorPaddingOp<PaddingDimensions, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename PaddingDimensions, typename XprType> | ||||
| class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value) | ||||
|       : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const PaddingDimensions& padding() const { return m_padding_dims; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     Scalar padding_value() const { return m_padding_value; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const PaddingDimensions m_padding_dims; | ||||
|     const Scalar m_padding_value; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename PaddingDimensions, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<PaddingDimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = true, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = true, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()) | ||||
|   { | ||||
|     // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead | ||||
|     // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector | ||||
|     // of 1 element first and then pad. | ||||
|     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|     // Compute dimensions | ||||
|     m_dimensions = m_impl.dimensions(); | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       m_dimensions[i] += m_padding[i].first + m_padding[i].second; | ||||
|     } | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_inputStrides[0] = 1; | ||||
|       m_outputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; | ||||
|         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; | ||||
|       } | ||||
|       m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1]; | ||||
|     } else { | ||||
|       m_inputStrides[NumDims - 1] = 1; | ||||
|       m_outputStrides[NumDims] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; | ||||
|         m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1]; | ||||
|       } | ||||
|       m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     eigen_assert(index < dimensions().TotalSize()); | ||||
|     Index inputIndex = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         if (isPaddingAtIndexForDim(idx, i)) { | ||||
|           return m_paddingValue; | ||||
|         } | ||||
|         inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       if (isPaddingAtIndexForDim(index, 0)) { | ||||
|         return m_paddingValue; | ||||
|       } | ||||
|       inputIndex += (index - m_padding[0].first); | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx = index / m_outputStrides[i+1]; | ||||
|         if (isPaddingAtIndexForDim(idx, i)) { | ||||
|           return m_paddingValue; | ||||
|         } | ||||
|         inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i+1]; | ||||
|       } | ||||
|       if (isPaddingAtIndexForDim(index, NumDims-1)) { | ||||
|         return m_paddingValue; | ||||
|       } | ||||
|       inputIndex += (index - m_padding[NumDims-1].first); | ||||
|     } | ||||
|     return m_impl.coeff(inputIndex); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       return packetColMajor(index); | ||||
|     } | ||||
|     return packetRowMajor(index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     TensorOpCost cost = m_impl.costPerCoeff(vectorized); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = 0; i < NumDims; ++i) | ||||
|         updateCostPerDimension(cost, i, i == 0); | ||||
|     } else { | ||||
|       for (int i = NumDims - 1; i >= 0; --i) | ||||
|         updateCostPerDimension(cost, i, i == NumDims - 1); | ||||
|     } | ||||
|     return cost; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|  private: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( | ||||
|       Index index, int dim_index) const { | ||||
| #if defined(EIGEN_HAS_INDEX_LIST) | ||||
|     return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) && | ||||
|             index < m_padding[dim_index].first) || | ||||
|         (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) && | ||||
|          index >= m_dimensions[dim_index] - m_padding[dim_index].second); | ||||
| #else | ||||
|     return (index < m_padding[dim_index].first) || | ||||
|            (index >= m_dimensions[dim_index] - m_padding[dim_index].second); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero( | ||||
|       int dim_index) const { | ||||
| #if defined(EIGEN_HAS_INDEX_LIST) | ||||
|     return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0); | ||||
| #else | ||||
|     EIGEN_UNUSED_VARIABLE(dim_index); | ||||
|     return false; | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero( | ||||
|       int dim_index) const { | ||||
| #if defined(EIGEN_HAS_INDEX_LIST) | ||||
|     return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0); | ||||
| #else | ||||
|     EIGEN_UNUSED_VARIABLE(dim_index); | ||||
|     return false; | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|  | ||||
|   void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const { | ||||
|     const double in = static_cast<double>(m_impl.dimensions()[i]); | ||||
|     const double out = in + m_padding[i].first + m_padding[i].second; | ||||
|     if (out == 0) | ||||
|       return; | ||||
|     const double reduction = in / out; | ||||
|     cost *= reduction; | ||||
|     if (first) { | ||||
|       cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() + | ||||
|                     reduction * (1 * TensorOpCost::AddCost<Index>())); | ||||
|     } else { | ||||
|       cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() + | ||||
|                                  2 * TensorOpCost::MulCost<Index>() + | ||||
|                     reduction * (2 * TensorOpCost::MulCost<Index>() + | ||||
|                                  1 * TensorOpCost::DivCost<Index>())); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     const Index initialIndex = index; | ||||
|     Index inputIndex = 0; | ||||
|     for (int i = NumDims - 1; i > 0; --i) { | ||||
|       const Index first = index; | ||||
|       const Index last = index + PacketSize - 1; | ||||
|       const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i]; | ||||
|       const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; | ||||
|       const Index lastPaddedRight = m_outputStrides[i+1]; | ||||
|  | ||||
|       if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { | ||||
|         // all the coefficient are in the padding zone. | ||||
|         return internal::pset1<PacketReturnType>(m_paddingValue); | ||||
|       } | ||||
|       else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { | ||||
|         // all the coefficient are in the padding zone. | ||||
|         return internal::pset1<PacketReturnType>(m_paddingValue); | ||||
|       } | ||||
|       else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { | ||||
|         // all the coefficient are between the 2 padding zones. | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       else { | ||||
|         // Every other case | ||||
|         return packetWithPossibleZero(initialIndex); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     const Index last = index + PacketSize - 1; | ||||
|     const Index first = index; | ||||
|     const Index lastPaddedLeft = m_padding[0].first; | ||||
|     const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second); | ||||
|     const Index lastPaddedRight = m_outputStrides[1]; | ||||
|  | ||||
|     if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) { | ||||
|       // all the coefficient are in the padding zone. | ||||
|       return internal::pset1<PacketReturnType>(m_paddingValue); | ||||
|     } | ||||
|     else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) { | ||||
|       // all the coefficient are in the padding zone. | ||||
|       return internal::pset1<PacketReturnType>(m_paddingValue); | ||||
|     } | ||||
|     else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { | ||||
|       // all the coefficient are between the 2 padding zones. | ||||
|       inputIndex += (index - m_padding[0].first); | ||||
|       return m_impl.template packet<Unaligned>(inputIndex); | ||||
|     } | ||||
|     // Every other case | ||||
|     return packetWithPossibleZero(initialIndex); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     const Index initialIndex = index; | ||||
|     Index inputIndex = 0; | ||||
|  | ||||
|     for (int i = 0; i < NumDims - 1; ++i) { | ||||
|       const Index first = index; | ||||
|       const Index last = index + PacketSize - 1; | ||||
|       const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1]; | ||||
|       const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1]; | ||||
|       const Index lastPaddedRight = m_outputStrides[i]; | ||||
|  | ||||
|       if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { | ||||
|         // all the coefficient are in the padding zone. | ||||
|         return internal::pset1<PacketReturnType>(m_paddingValue); | ||||
|       } | ||||
|       else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { | ||||
|         // all the coefficient are in the padding zone. | ||||
|         return internal::pset1<PacketReturnType>(m_paddingValue); | ||||
|       } | ||||
|       else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { | ||||
|         // all the coefficient are between the 2 padding zones. | ||||
|         const Index idx = index / m_outputStrides[i+1]; | ||||
|         inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i+1]; | ||||
|       } | ||||
|       else { | ||||
|         // Every other case | ||||
|         return packetWithPossibleZero(initialIndex); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     const Index last = index + PacketSize - 1; | ||||
|     const Index first = index; | ||||
|     const Index lastPaddedLeft = m_padding[NumDims-1].first; | ||||
|     const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second); | ||||
|     const Index lastPaddedRight = m_outputStrides[NumDims-1]; | ||||
|  | ||||
|     if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) { | ||||
|       // all the coefficient are in the padding zone. | ||||
|       return internal::pset1<PacketReturnType>(m_paddingValue); | ||||
|     } | ||||
|     else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) { | ||||
|       // all the coefficient are in the padding zone. | ||||
|       return internal::pset1<PacketReturnType>(m_paddingValue); | ||||
|     } | ||||
|     else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { | ||||
|       // all the coefficient are between the 2 padding zones. | ||||
|       inputIndex += (index - m_padding[NumDims-1].first); | ||||
|       return m_impl.template packet<Unaligned>(inputIndex); | ||||
|     } | ||||
|     // Every other case | ||||
|     return packetWithPossibleZero(initialIndex); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const | ||||
|   { | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|     for (int i = 0; i < PacketSize; ++i) { | ||||
|       values[i] = coeff(index+i); | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims+1> m_outputStrides; | ||||
|   array<Index, NumDims> m_inputStrides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   PaddingDimensions m_padding; | ||||
|  | ||||
|   Scalar m_paddingValue; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H | ||||
							
								
								
									
										269
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										269
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,269 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorPatch | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor patch class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename PatchDim, typename XprType> | ||||
| struct traits<TensorPatchOp<PatchDim, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions + 1; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename PatchDim, typename XprType> | ||||
| struct eval<TensorPatchOp<PatchDim, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorPatchOp<PatchDim, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename PatchDim, typename XprType> | ||||
| struct nested<TensorPatchOp<PatchDim, XprType>, 1, typename eval<TensorPatchOp<PatchDim, XprType> >::type> | ||||
| { | ||||
|   typedef TensorPatchOp<PatchDim, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename PatchDim, typename XprType> | ||||
| class TensorPatchOp : public TensorBase<TensorPatchOp<PatchDim, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorPatchOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorPatchOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorPatchOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorPatchOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPatchOp(const XprType& expr, const PatchDim& patch_dims) | ||||
|       : m_xpr(expr), m_patch_dims(patch_dims) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const PatchDim& patch_dims() const { return m_patch_dims; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const PatchDim m_patch_dims; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename PatchDim, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorPatchOp<PatchDim, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false, | ||||
|     RawAccess = false | ||||
|  }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device) | ||||
|   { | ||||
|     Index num_patches = 1; | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     const PatchDim& patch_dims = op.patch_dims(); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = 0; i < NumDims-1; ++i) { | ||||
|         m_dimensions[i] = patch_dims[i]; | ||||
|         num_patches *= (input_dims[i] - patch_dims[i] + 1); | ||||
|       } | ||||
|       m_dimensions[NumDims-1] = num_patches; | ||||
|  | ||||
|       m_inputStrides[0] = 1; | ||||
|       m_patchStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims-1; ++i) { | ||||
|         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; | ||||
|         m_patchStrides[i] = m_patchStrides[i-1] * (input_dims[i-1] - patch_dims[i-1] + 1); | ||||
|       } | ||||
|       m_outputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims-1; ++i) { | ||||
|         m_dimensions[i+1] = patch_dims[i]; | ||||
|         num_patches *= (input_dims[i] - patch_dims[i] + 1); | ||||
|       } | ||||
|       m_dimensions[0] = num_patches; | ||||
|  | ||||
|       m_inputStrides[NumDims-2] = 1; | ||||
|       m_patchStrides[NumDims-2] = 1; | ||||
|       for (int i = NumDims-3; i >= 0; --i) { | ||||
|         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; | ||||
|         m_patchStrides[i] = m_patchStrides[i+1] * (input_dims[i+1] - patch_dims[i+1] + 1); | ||||
|       } | ||||
|       m_outputStrides[NumDims-1] = 1; | ||||
|       for (int i = NumDims-2; i >= 0; --i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0; | ||||
|     // Find the location of the first element of the patch. | ||||
|     Index patchIndex = index / m_outputStrides[output_stride_index]; | ||||
|     // Find the offset of the element wrt the location of the first element. | ||||
|     Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index]; | ||||
|     Index inputIndex = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 2; i > 0; --i) { | ||||
|         const Index patchIdx = patchIndex / m_patchStrides[i]; | ||||
|         patchIndex -= patchIdx * m_patchStrides[i]; | ||||
|         const Index offsetIdx = patchOffset / m_outputStrides[i]; | ||||
|         patchOffset -= offsetIdx * m_outputStrides[i]; | ||||
|         inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 2; ++i) { | ||||
|         const Index patchIdx = patchIndex / m_patchStrides[i]; | ||||
|         patchIndex -= patchIdx * m_patchStrides[i]; | ||||
|         const Index offsetIdx = patchOffset / m_outputStrides[i+1]; | ||||
|         patchOffset -= offsetIdx * m_outputStrides[i+1]; | ||||
|         inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; | ||||
|       } | ||||
|     } | ||||
|     inputIndex += (patchIndex + patchOffset); | ||||
|     return m_impl.coeff(inputIndex); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0; | ||||
|     Index indices[2] = {index, index + PacketSize - 1}; | ||||
|     Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index], | ||||
|                              indices[1] / m_outputStrides[output_stride_index]}; | ||||
|     Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index], | ||||
|                              indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]}; | ||||
|  | ||||
|     Index inputIndices[2] = {0, 0}; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 2; i > 0; --i) { | ||||
|         const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], | ||||
|                                    patchIndices[1] / m_patchStrides[i]}; | ||||
|         patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; | ||||
|         patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; | ||||
|  | ||||
|         const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i], | ||||
|                                     patchOffsets[1] / m_outputStrides[i]}; | ||||
|         patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i]; | ||||
|         patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i]; | ||||
|  | ||||
|         inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; | ||||
|         inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 2; ++i) { | ||||
|         const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], | ||||
|                                    patchIndices[1] / m_patchStrides[i]}; | ||||
|         patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; | ||||
|         patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; | ||||
|  | ||||
|         const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i+1], | ||||
|                                     patchOffsets[1] / m_outputStrides[i+1]}; | ||||
|         patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i+1]; | ||||
|         patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i+1]; | ||||
|  | ||||
|         inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; | ||||
|         inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; | ||||
|       } | ||||
|     } | ||||
|     inputIndices[0] += (patchIndices[0] + patchOffsets[0]); | ||||
|     inputIndices[1] += (patchIndices[1] + patchOffsets[1]); | ||||
|  | ||||
|     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { | ||||
|       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); | ||||
|       return rslt; | ||||
|     } | ||||
|     else { | ||||
|       EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; | ||||
|       values[0] = m_impl.coeff(inputIndices[0]); | ||||
|       values[PacketSize-1] = m_impl.coeff(inputIndices[1]); | ||||
|       for (int i = 1; i < PacketSize-1; ++i) { | ||||
|         values[i] = coeff(index+i); | ||||
|       } | ||||
|       PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|       return rslt; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     const double compute_cost = NumDims * (TensorOpCost::DivCost<Index>() + | ||||
|                                            TensorOpCost::MulCost<Index>() + | ||||
|                                            2 * TensorOpCost::AddCost<Index>()); | ||||
|     return m_impl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|  protected: | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims> m_outputStrides; | ||||
|   array<Index, NumDims-1> m_inputStrides; | ||||
|   array<Index, NumDims-1> m_patchStrides; | ||||
|  | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_PATCH_H | ||||
							
								
								
									
										276
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										276
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,276 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| EIGEN_DEVICE_FUNC uint64_t get_random_seed() { | ||||
| #ifdef __CUDA_ARCH__ | ||||
|   // We don't support 3d kernels since we currently only use 1 and | ||||
|   // 2d kernels. | ||||
|   assert(threadIdx.z == 0); | ||||
|   return clock64() + | ||||
|       blockIdx.x * blockDim.x + threadIdx.x + | ||||
|       gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); | ||||
|  | ||||
| #elif defined _WIN32 | ||||
|   // Use the current time as a baseline. | ||||
|   SYSTEMTIME st; | ||||
|   GetSystemTime(&st); | ||||
|   int time = st.wSecond + 1000 * st.wMilliseconds; | ||||
|   // Mix in a random number to make sure that we get different seeds if | ||||
|   // we try to generate seeds faster than the clock resolution. | ||||
|   // We need 2 random values since the generator only generate 16 bits at | ||||
|   // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) | ||||
|   int rnd1 = ::rand(); | ||||
|   int rnd2 = ::rand(); | ||||
|   uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; | ||||
|   return rnd; | ||||
|  | ||||
| #elif defined __APPLE__ | ||||
|   // Same approach as for win32, except that the random number generator | ||||
|   // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). | ||||
|   uint64_t rnd = ::random() ^ mach_absolute_time(); | ||||
|   return rnd; | ||||
|  | ||||
| #else | ||||
|   // Augment the current time with pseudo random number generation | ||||
|   // to ensure that we get different seeds if we try to generate seeds | ||||
|   // faster than the clock resolution. | ||||
|   timespec ts; | ||||
|   clock_gettime(CLOCK_REALTIME, &ts); | ||||
|   uint64_t rnd = ::random() ^ ts.tv_nsec; | ||||
|   return rnd; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { | ||||
|   // TODO: Unify with the implementation in the non blocking thread pool. | ||||
|   uint64_t current = *state; | ||||
|   // Update the internal state | ||||
|   *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; | ||||
|   // Generate the random output (using the PCG-XSH-RS scheme) | ||||
|   return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); | ||||
| } | ||||
|  | ||||
| static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { | ||||
|   seed = seed ? seed : get_random_seed(); | ||||
|   return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; | ||||
| } | ||||
|  | ||||
| }  // namespace | ||||
|  | ||||
|  | ||||
| template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| T RandomToTypeUniform(uint64_t* state) { | ||||
|   unsigned rnd = PCG_XSH_RS_generator(state); | ||||
|   return static_cast<T>(rnd); | ||||
| } | ||||
|  | ||||
|  | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) { | ||||
|   Eigen::half result; | ||||
|   // Generate 10 random bits for the mantissa | ||||
|   unsigned rnd = PCG_XSH_RS_generator(state); | ||||
|   result.x = static_cast<uint16_t>(rnd & 0x3ffu); | ||||
|   // Set the exponent | ||||
|   result.x |= (static_cast<uint16_t>(15) << 10); | ||||
|   // Return the final result | ||||
|   return result - Eigen::half(1.0f); | ||||
| } | ||||
|  | ||||
|  | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| float RandomToTypeUniform<float>(uint64_t* state) { | ||||
|   typedef union { | ||||
|     uint32_t raw; | ||||
|     float fp; | ||||
|   } internal; | ||||
|   internal result; | ||||
|   // Generate 23 random bits for the mantissa mantissa | ||||
|   const unsigned rnd = PCG_XSH_RS_generator(state); | ||||
|   result.raw = rnd & 0x7fffffu; | ||||
|   // Set the exponent | ||||
|   result.raw |= (static_cast<uint32_t>(127) << 23); | ||||
|   // Return the final result | ||||
|   return result.fp - 1.0f; | ||||
| } | ||||
|  | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| double RandomToTypeUniform<double>(uint64_t* state) { | ||||
|   typedef union { | ||||
|     uint64_t raw; | ||||
|     double dp; | ||||
|   } internal; | ||||
|   internal result; | ||||
|   result.raw = 0; | ||||
|   // Generate 52 random bits for the mantissa | ||||
|   // First generate the upper 20 bits | ||||
|   unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; | ||||
|   // The generate the lower 32 bits | ||||
|   unsigned rnd2 = PCG_XSH_RS_generator(state); | ||||
|   result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2; | ||||
|   // Set the exponent | ||||
|   result.raw |= (static_cast<uint64_t>(1023) << 52); | ||||
|   // Return the final result | ||||
|   return result.dp - 1.0; | ||||
| } | ||||
|  | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) { | ||||
|   return std::complex<float>(RandomToTypeUniform<float>(state), | ||||
|                              RandomToTypeUniform<float>(state)); | ||||
| } | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) { | ||||
|   return std::complex<double>(RandomToTypeUniform<double>(state), | ||||
|                               RandomToTypeUniform<double>(state)); | ||||
| } | ||||
|  | ||||
| template <typename T> class UniformRandomGenerator { | ||||
|  public: | ||||
|   static const bool PacketAccess = true; | ||||
|  | ||||
|   // Uses the given "seed" if non-zero, otherwise uses a random seed. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( | ||||
|       uint64_t seed = 0) { | ||||
|     m_state = PCG_XSH_RS_state(seed); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( | ||||
|       const UniformRandomGenerator& other) { | ||||
|     m_state = other.m_state; | ||||
|   } | ||||
|  | ||||
|   template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   T operator()(Index i) const { | ||||
|     uint64_t local_state = m_state + i; | ||||
|     T result = RandomToTypeUniform<T>(&local_state); | ||||
|     m_state = local_state; | ||||
|     return result; | ||||
|   } | ||||
|  | ||||
|   template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Packet packetOp(Index i) const { | ||||
|     const int packetSize = internal::unpacket_traits<Packet>::size; | ||||
|     EIGEN_ALIGN_MAX T values[packetSize]; | ||||
|     uint64_t local_state = m_state + i; | ||||
|     for (int j = 0; j < packetSize; ++j) { | ||||
|       values[j] = RandomToTypeUniform<T>(&local_state); | ||||
|     } | ||||
|     m_state = local_state; | ||||
|     return internal::pload<Packet>(values); | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   mutable uint64_t m_state; | ||||
| }; | ||||
|  | ||||
| template <typename Scalar> | ||||
| struct functor_traits<UniformRandomGenerator<Scalar> > { | ||||
|   enum { | ||||
|     // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). | ||||
|     Cost = 12 * NumTraits<Scalar>::AddCost * | ||||
|            ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), | ||||
|     PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| T RandomToTypeNormal(uint64_t* state) { | ||||
|   // Use the ratio of uniform method to generate numbers following a normal | ||||
|   // distribution. See for example Numerical Recipes chapter 7.3.9 for the | ||||
|   // details. | ||||
|   T u, v, q; | ||||
|   do { | ||||
|     u = RandomToTypeUniform<T>(state); | ||||
|     v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5)); | ||||
|     const T x = u - T(0.449871); | ||||
|     const T y = numext::abs(v) + T(0.386595); | ||||
|     q = x*x + y * (T(0.196)*y - T(0.25472)*x); | ||||
|   } while (q > T(0.27597) && | ||||
|            (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); | ||||
|  | ||||
|   return v/u; | ||||
| } | ||||
|  | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) { | ||||
|   return std::complex<float>(RandomToTypeNormal<float>(state), | ||||
|                              RandomToTypeNormal<float>(state)); | ||||
| } | ||||
| template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) { | ||||
|   return std::complex<double>(RandomToTypeNormal<double>(state), | ||||
|                               RandomToTypeNormal<double>(state)); | ||||
| } | ||||
|  | ||||
|  | ||||
| template <typename T> class NormalRandomGenerator { | ||||
|  public: | ||||
|   static const bool PacketAccess = true; | ||||
|  | ||||
|   // Uses the given "seed" if non-zero, otherwise uses a random seed. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { | ||||
|     m_state = PCG_XSH_RS_state(seed); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( | ||||
|       const NormalRandomGenerator& other) { | ||||
|     m_state = other.m_state; | ||||
|   } | ||||
|  | ||||
|  template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   T operator()(Index i) const { | ||||
|     uint64_t local_state = m_state + i; | ||||
|     T result = RandomToTypeNormal<T>(&local_state); | ||||
|     m_state = local_state; | ||||
|     return result; | ||||
|   } | ||||
|  | ||||
|   template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   Packet packetOp(Index i) const { | ||||
|     const int packetSize = internal::unpacket_traits<Packet>::size; | ||||
|     EIGEN_ALIGN_MAX T values[packetSize]; | ||||
|     uint64_t local_state = m_state + i; | ||||
|     for (int j = 0; j < packetSize; ++j) { | ||||
|       values[j] = RandomToTypeNormal<T>(&local_state); | ||||
|     } | ||||
|     m_state = local_state; | ||||
|     return internal::pload<Packet>(values); | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   mutable uint64_t m_state; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename Scalar> | ||||
| struct functor_traits<NormalRandomGenerator<Scalar> > { | ||||
|   enum { | ||||
|     // On average, we need to generate about 3 random numbers | ||||
|     // 15 mul, 8 add, 1.5 logs | ||||
|     Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost + | ||||
|            15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost + | ||||
|            3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2, | ||||
|     PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace internal | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H | ||||
							
								
								
									
										781
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										781
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,781 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorReduction | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor reduction class. | ||||
|   * | ||||
|   */ | ||||
|  | ||||
| namespace internal { | ||||
|   template<typename Op, typename Dims, typename XprType,template <class> class MakePointer_ > | ||||
|   struct traits<TensorReductionOp<Op, Dims, XprType, MakePointer_> > | ||||
|  : traits<XprType> | ||||
| { | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::Scalar Scalar; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
|  | ||||
|   template <class T> struct MakePointer { | ||||
|     // Intermediate typedef to workaround MSVC issue. | ||||
|     typedef MakePointer_<T> MakePointerT; | ||||
|     typedef typename MakePointerT::Type Type; | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_> | ||||
| struct eval<TensorReductionOp<Op, Dims, XprType, MakePointer_>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorReductionOp<Op, Dims, XprType, MakePointer_>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_> | ||||
| struct nested<TensorReductionOp<Op, Dims, XprType, MakePointer_>, 1, typename eval<TensorReductionOp<Op, Dims, XprType, MakePointer_> >::type> | ||||
| { | ||||
|   typedef TensorReductionOp<Op, Dims, XprType, MakePointer_> type; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename OutputDims> struct DimInitializer { | ||||
|   template <typename InputDims, typename ReducedDims> EIGEN_DEVICE_FUNC | ||||
|   static void run(const InputDims& input_dims, | ||||
|                   const array<bool, internal::array_size<InputDims>::value>& reduced, | ||||
|                   OutputDims* output_dims, ReducedDims* reduced_dims) { | ||||
|     const int NumInputDims = internal::array_size<InputDims>::value; | ||||
|     int outputIndex = 0; | ||||
|     int reduceIndex = 0; | ||||
|     for (int i = 0; i < NumInputDims; ++i) { | ||||
|       if (reduced[i]) { | ||||
|         (*reduced_dims)[reduceIndex] = input_dims[i]; | ||||
|         ++reduceIndex; | ||||
|       } else { | ||||
|         (*output_dims)[outputIndex] = input_dims[i]; | ||||
|         ++outputIndex; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <> struct DimInitializer<Sizes<> > { | ||||
|   template <typename InputDims, typename Index, size_t Rank> EIGEN_DEVICE_FUNC | ||||
|   static void run(const InputDims& input_dims, const array<bool, Rank>&, | ||||
|                   Sizes<>*, array<Index, Rank>* reduced_dims) { | ||||
|     const int NumInputDims = internal::array_size<InputDims>::value; | ||||
|     for (int i = 0; i < NumInputDims; ++i) { | ||||
|       (*reduced_dims)[i] = input_dims[i]; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename ReducedDims, int NumTensorDims, int Layout> | ||||
| struct are_inner_most_dims { | ||||
|   static const bool value = false; | ||||
| }; | ||||
| template <typename ReducedDims, int NumTensorDims, int Layout> | ||||
| struct preserve_inner_most_dims { | ||||
|   static const bool value = false; | ||||
| }; | ||||
|  | ||||
| #if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES | ||||
| template <typename ReducedDims, int NumTensorDims> | ||||
| struct are_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{ | ||||
|   static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>(); | ||||
|   static const bool tmp2 = index_statically_eq<ReducedDims>(0, 0); | ||||
|   static const bool tmp3 = index_statically_eq<ReducedDims>(array_size<ReducedDims>::value-1, array_size<ReducedDims>::value-1); | ||||
|   static const bool value = tmp1 & tmp2 & tmp3; | ||||
| }; | ||||
| template <typename ReducedDims, int NumTensorDims> | ||||
| struct are_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{ | ||||
|   static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>(); | ||||
|   static const bool tmp2 = index_statically_eq<ReducedDims>(0, NumTensorDims - array_size<ReducedDims>::value); | ||||
|   static const bool tmp3 = index_statically_eq<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1); | ||||
|   static const bool value = tmp1 & tmp2 & tmp3; | ||||
|  | ||||
| }; | ||||
| template <typename ReducedDims, int NumTensorDims> | ||||
| struct preserve_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{ | ||||
|   static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>(); | ||||
|   static const bool tmp2 = index_statically_gt<ReducedDims>(0, 0); | ||||
|   static const bool value = tmp1 & tmp2; | ||||
|  | ||||
| }; | ||||
| template <typename ReducedDims, int NumTensorDims> | ||||
| struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{ | ||||
|   static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>(); | ||||
|   static const bool tmp2 = index_statically_lt<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1); | ||||
|   static const bool value = tmp1 & tmp2; | ||||
| }; | ||||
| #endif | ||||
|  | ||||
|  | ||||
| template <int DimIndex, typename Self, typename Op> | ||||
| struct GenericDimReducer { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { | ||||
|     EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { | ||||
|       const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; | ||||
|       GenericDimReducer<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
| template <typename Self, typename Op> | ||||
| struct GenericDimReducer<0, Self, Op> { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { | ||||
|     for (int j = 0; j < self.m_reducedDims[0]; ++j) { | ||||
|       const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; | ||||
|       reducer.reduce(self.m_impl.coeff(input), accum); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
| template <typename Self, typename Op> | ||||
| struct GenericDimReducer<-1, Self, Op> { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index index, Op& reducer, typename Self::CoeffReturnType* accum) { | ||||
|     reducer.reduce(self.m_impl.coeff(index), accum); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> | ||||
| struct InnerMostDimReducer { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { | ||||
|     typename Self::CoeffReturnType accum = reducer.initialize(); | ||||
|     for (typename Self::Index j = 0; j < numValuesToReduce; ++j) { | ||||
|       reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); | ||||
|     } | ||||
|     return reducer.finalize(accum); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Self, typename Op> | ||||
| struct InnerMostDimReducer<Self, Op, true> { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { | ||||
|     const int packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size; | ||||
|     const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; | ||||
|     typename Self::PacketReturnType p = reducer.template initializePacket<typename Self::PacketReturnType>(); | ||||
|     for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) { | ||||
|       reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &p); | ||||
|     } | ||||
|     typename Self::CoeffReturnType accum = reducer.initialize(); | ||||
|     for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) { | ||||
|       reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); | ||||
|     } | ||||
|     return reducer.finalizeBoth(accum, p); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> | ||||
| struct InnerMostDimPreserver { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { | ||||
|     eigen_assert(false && "should never be called"); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <int DimIndex, typename Self, typename Op> | ||||
| struct InnerMostDimPreserver<DimIndex, Self, Op, true> { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { | ||||
|     EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) { | ||||
|       const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; | ||||
|       InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Self, typename Op> | ||||
| struct InnerMostDimPreserver<0, Self, Op, true> { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { | ||||
|     for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) { | ||||
|       const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; | ||||
|       reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
| template <typename Self, typename Op> | ||||
| struct InnerMostDimPreserver<-1, Self, Op, true> { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { | ||||
|     eigen_assert(false && "should never be called"); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| // Default full reducer | ||||
| template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> | ||||
| struct FullReducer { | ||||
|   static const bool HasOptimizedImplementation = false; | ||||
|  | ||||
|   static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) { | ||||
|     const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions()); | ||||
|     *output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| #ifdef EIGEN_USE_THREADS | ||||
| // Multithreaded full reducers | ||||
| template <typename Self, typename Op, | ||||
|           bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> | ||||
| struct FullReducerShard { | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex, | ||||
|                   typename Self::Index numValuesToReduce, Op& reducer, | ||||
|                   typename Self::CoeffReturnType* output) { | ||||
|     *output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce( | ||||
|         self, firstIndex, numValuesToReduce, reducer); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| // Multithreaded full reducer | ||||
| template <typename Self, typename Op, bool Vectorizable> | ||||
| struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> { | ||||
|   static const bool HasOptimizedImplementation = !Op::IsStateful; | ||||
|   static const int PacketSize = | ||||
|       unpacket_traits<typename Self::PacketReturnType>::size; | ||||
|  | ||||
|   // launch one reducer per thread and accumulate the result. | ||||
|   static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, | ||||
|                   typename Self::CoeffReturnType* output) { | ||||
|     typedef typename Self::Index Index; | ||||
|     const Index num_coeffs = array_prod(self.m_impl.dimensions()); | ||||
|     if (num_coeffs == 0) { | ||||
|       *output = reducer.finalize(reducer.initialize()); | ||||
|       return; | ||||
|     } | ||||
|     const TensorOpCost cost = | ||||
|         self.m_impl.costPerCoeff(Vectorizable) + | ||||
|         TensorOpCost(0, 0, internal::functor_traits<Op>::Cost, Vectorizable, | ||||
|                      PacketSize); | ||||
|     const int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads( | ||||
|         num_coeffs, cost, device.numThreads()); | ||||
|     if (num_threads == 1) { | ||||
|       *output = | ||||
|           InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer); | ||||
|       return; | ||||
|     } | ||||
|     const Index blocksize = | ||||
|         std::floor<Index>(static_cast<float>(num_coeffs) / num_threads); | ||||
|     const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; | ||||
|     eigen_assert(num_coeffs >= numblocks * blocksize); | ||||
|  | ||||
|     Barrier barrier(internal::convert_index<unsigned int>(numblocks)); | ||||
|     MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize()); | ||||
|     for (Index i = 0; i < numblocks; ++i) { | ||||
|       device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, Vectorizable>::run, | ||||
|                                   self, i * blocksize, blocksize, reducer, | ||||
|                                   &shards[i]); | ||||
|     } | ||||
|     typename Self::CoeffReturnType finalShard; | ||||
|     if (numblocks * blocksize < num_coeffs) { | ||||
|       finalShard = InnerMostDimReducer<Self, Op, Vectorizable>::reduce( | ||||
|           self, numblocks * blocksize, num_coeffs - numblocks * blocksize, | ||||
|           reducer); | ||||
|     } else { | ||||
|       finalShard = reducer.initialize(); | ||||
|     } | ||||
|     barrier.Wait(); | ||||
|  | ||||
|     for (Index i = 0; i < numblocks; ++i) { | ||||
|       reducer.reduce(shards[i], &finalShard); | ||||
|     } | ||||
|     *output = reducer.finalize(finalShard); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #endif | ||||
|  | ||||
|  | ||||
| // Default inner reducer | ||||
| template <typename Self, typename Op, typename Device> | ||||
| struct InnerReducer { | ||||
|   static const bool HasOptimizedImplementation = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { | ||||
|     eigen_assert(false && "Not implemented"); | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| // Default outer reducer | ||||
| template <typename Self, typename Op, typename Device> | ||||
| struct OuterReducer { | ||||
|   static const bool HasOptimizedImplementation = false; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { | ||||
|     eigen_assert(false && "Not implemented"); | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) | ||||
| template <int B, int N, typename S, typename R, typename I> | ||||
| __global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); | ||||
|  | ||||
|  | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
| template <typename S, typename R, typename I> | ||||
| __global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); | ||||
| template <int B, int N, typename S, typename R, typename I> | ||||
| __global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*); | ||||
| template <int NPT, typename S, typename R, typename I> | ||||
| __global__ void InnerReductionKernelHalfFloat(R, const S, I, I, half*); | ||||
|  | ||||
| #endif | ||||
|  | ||||
| template <int NPT, typename S, typename R, typename I> | ||||
| __global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); | ||||
|  | ||||
| template <int NPT, typename S, typename R, typename I> | ||||
| __global__ void OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); | ||||
| #endif | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
| template <typename Op, typename Dims, typename XprType,  template <class> class MakePointer_> | ||||
| class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType, MakePointer_>, ReadOnlyAccessors> { | ||||
|   public: | ||||
|     typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar; | ||||
|     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|     typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested; | ||||
|     typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind; | ||||
|     typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|     TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims) | ||||
|     { } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|     TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer) | ||||
|     { } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|     const XprType& expression() const { return m_expr; } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|     const Dims& dims() const { return m_dims; } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|     const Op& reducer() const { return m_reducer; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_expr; | ||||
|     const Dims m_dims; | ||||
|     const Op m_reducer; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> | ||||
| struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> | ||||
| { | ||||
|   typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef ArgType ChildType; | ||||
|   typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; | ||||
|   static const int NumInputDims = internal::array_size<InputDimensions>::value; | ||||
|   static const int NumReducedDims = internal::array_size<Dims>::value; | ||||
|   static const int NumOutputDims = NumInputDims - NumReducedDims; | ||||
|   typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self; | ||||
|   static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess; | ||||
|   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = Self::InputPacketAccess && Op::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value; | ||||
|   static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value; | ||||
|   static const bool RunningFullReduction = (NumOutputDims==0); | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device), m_xpr_dims(op.dims()) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), | ||||
|                         YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|     // Build the bitmap indicating if an input dimension is reduced or not. | ||||
|     for (int i = 0; i < NumInputDims; ++i) { | ||||
|       m_reduced[i] = false; | ||||
|     } | ||||
|     for (int i = 0; i < NumReducedDims; ++i) { | ||||
|       eigen_assert(op.dims()[i] >= 0); | ||||
|       eigen_assert(op.dims()[i] < NumInputDims); | ||||
|       m_reduced[op.dims()[i]] = true; | ||||
|     } | ||||
|  | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     internal::DimInitializer<Dimensions>::run(input_dims, m_reduced, &m_dimensions, &m_reducedDims); | ||||
|  | ||||
|     // Precompute output strides. | ||||
|     if (NumOutputDims > 0) { | ||||
|       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|         m_outputStrides[0] = 1; | ||||
|         for (int i = 1; i < NumOutputDims; ++i) { | ||||
|           m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; | ||||
|         } | ||||
|       } else { | ||||
|         m_outputStrides.back() = 1; | ||||
|         for (int i = NumOutputDims - 2; i >= 0; --i) { | ||||
|           m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Precompute input strides. | ||||
|     if (NumInputDims > 0) { | ||||
|       array<Index, NumInputDims> input_strides; | ||||
|       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|         input_strides[0] = 1; | ||||
|         for (int i = 1; i < NumInputDims; ++i) { | ||||
|           input_strides[i] = input_strides[i-1] * input_dims[i-1]; | ||||
|         } | ||||
|       } else { | ||||
|         input_strides.back() = 1; | ||||
|         for (int i = NumInputDims - 2; i >= 0; --i) { | ||||
|           input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       int outputIndex = 0; | ||||
|       int reduceIndex = 0; | ||||
|       for (int i = 0; i < NumInputDims; ++i) { | ||||
|         if (m_reduced[i]) { | ||||
|           m_reducedStrides[reduceIndex] = input_strides[i]; | ||||
|           ++reduceIndex; | ||||
|         } else { | ||||
|           m_preservedStrides[outputIndex] = input_strides[i]; | ||||
|           ++outputIndex; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Special case for full reductions | ||||
|     if (NumOutputDims == 0) { | ||||
|       m_preservedStrides[0] = internal::array_prod(input_dims); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(typename MakePointer_<CoeffReturnType>::Type data) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|  | ||||
|     // Use the FullReducer if possible. | ||||
|     if ((RunningFullReduction && RunningOnSycl) ||(RunningFullReduction && | ||||
|         internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation && | ||||
|         ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || | ||||
|          !RunningOnGPU))) { | ||||
|       bool need_assign = false; | ||||
|       if (!data) { | ||||
|         m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType))); | ||||
|         data = m_result; | ||||
|         need_assign = true; | ||||
|       } | ||||
|       Op reducer(m_reducer); | ||||
|       internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data); | ||||
|       return need_assign; | ||||
|     } | ||||
|     else if(RunningOnSycl){ | ||||
|       const Index num_values_to_reduce = internal::array_prod(m_reducedDims); | ||||
|       const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); | ||||
|       if (!data) { | ||||
|         data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); | ||||
|         m_result = data; | ||||
|       } | ||||
|       Op reducer(m_reducer); | ||||
|       internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); | ||||
|       return (m_result != NULL); | ||||
|     } | ||||
|  | ||||
|     // Attempt to use an optimized reduction. | ||||
|     else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) { | ||||
|       bool reducing_inner_dims = true; | ||||
|       for (int i = 0; i < NumReducedDims; ++i) { | ||||
|         if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|           reducing_inner_dims &= m_reduced[i]; | ||||
|         } else { | ||||
|           reducing_inner_dims &= m_reduced[NumInputDims - 1 - i]; | ||||
|         } | ||||
|       } | ||||
|       if (internal::InnerReducer<Self, Op, Device>::HasOptimizedImplementation && | ||||
|           (reducing_inner_dims || ReducingInnerMostDims)) { | ||||
|         const Index num_values_to_reduce = internal::array_prod(m_reducedDims); | ||||
|         const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); | ||||
|         if (!data) { | ||||
|           if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { | ||||
|             data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); | ||||
|             m_result = data; | ||||
|           } | ||||
|           else { | ||||
|             return true; | ||||
|           } | ||||
|         } | ||||
|         Op reducer(m_reducer); | ||||
|         if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { | ||||
|           if (m_result) { | ||||
|             m_device.deallocate(m_result); | ||||
|             m_result = NULL; | ||||
|           } | ||||
|           return true; | ||||
|         } else { | ||||
|           return (m_result != NULL); | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       bool preserving_inner_dims = true; | ||||
|       for (int i = 0; i < NumReducedDims; ++i) { | ||||
|         if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|           preserving_inner_dims &= m_reduced[NumInputDims - 1 - i]; | ||||
|         } else { | ||||
|           preserving_inner_dims &= m_reduced[i]; | ||||
|         } | ||||
|       } | ||||
|       if (internal::OuterReducer<Self, Op, Device>::HasOptimizedImplementation && | ||||
|           preserving_inner_dims) { | ||||
|         const Index num_values_to_reduce = internal::array_prod(m_reducedDims); | ||||
|         const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); | ||||
|         if (!data) { | ||||
|           if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { | ||||
|             data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); | ||||
|             m_result = data; | ||||
|           } | ||||
|           else { | ||||
|             return true; | ||||
|           } | ||||
|         } | ||||
|         Op reducer(m_reducer); | ||||
|         if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { | ||||
|           if (m_result) { | ||||
|             m_device.deallocate(m_result); | ||||
|             m_result = NULL; | ||||
|           } | ||||
|           return true; | ||||
|         } else { | ||||
|           return (m_result != NULL); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|     if (m_result) { | ||||
|       m_device.deallocate(m_result); | ||||
|       m_result = NULL; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     if ((RunningOnSycl || RunningFullReduction || RunningOnGPU) && m_result) { | ||||
|       return *(m_result + index); | ||||
|     } | ||||
|     Op reducer(m_reducer); | ||||
|     if (ReducingInnerMostDims || RunningFullReduction) { | ||||
|       const Index num_values_to_reduce = | ||||
|         (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; | ||||
|       return internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstInput(index), | ||||
|                                                              num_values_to_reduce, reducer); | ||||
|     } else { | ||||
|       typename Self::CoeffReturnType accum = reducer.initialize(); | ||||
|       internal::GenericDimReducer<NumReducedDims-1, Self, Op>::reduce(*this, firstInput(index), reducer, &accum); | ||||
|       return reducer.finalize(accum); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // TODO(bsteiner): provide a more efficient implementation. | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions()))); | ||||
|  | ||||
|     if (RunningOnGPU && m_result) { | ||||
|       return internal::pload<PacketReturnType>(m_result + index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|     if (ReducingInnerMostDims) { | ||||
|       const Index num_values_to_reduce = | ||||
|         (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; | ||||
|       const Index firstIndex = firstInput(index); | ||||
|       for (Index i = 0; i < PacketSize; ++i) { | ||||
|         Op reducer(m_reducer); | ||||
|         values[i] = internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstIndex + i * num_values_to_reduce, | ||||
|                                                                     num_values_to_reduce, reducer); | ||||
|       } | ||||
|     } else if (PreservingInnerMostDims) { | ||||
|       const Index firstIndex = firstInput(index); | ||||
|       const int innermost_dim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : NumOutputDims - 1; | ||||
|       // TBD: extend this the the n innermost dimensions that we preserve. | ||||
|       if (((firstIndex % m_dimensions[innermost_dim]) + PacketSize - 1) < m_dimensions[innermost_dim]) { | ||||
|         Op reducer(m_reducer); | ||||
|         typename Self::PacketReturnType accum = reducer.template initializePacket<typename Self::PacketReturnType>(); | ||||
|         internal::InnerMostDimPreserver<NumReducedDims-1, Self, Op>::reduce(*this, firstIndex, reducer, &accum); | ||||
|         return reducer.finalizePacket(accum); | ||||
|       } else { | ||||
|         for (int i = 0; i < PacketSize; ++i) { | ||||
|           values[i] = coeff(index + i); | ||||
|         } | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = 0; i < PacketSize; ++i) { | ||||
|         values[i] = coeff(index + i); | ||||
|       } | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   // Must be called after evalSubExprsIfNeeded(). | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     if (RunningFullReduction && m_result) { | ||||
|       return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); | ||||
|     } else { | ||||
|       const Index num_values_to_reduce = internal::array_prod(m_reducedDims); | ||||
|       const double compute_cost = num_values_to_reduce * internal::functor_traits<Op>::Cost; | ||||
|       return m_impl.costPerCoeff(vectorized) * num_values_to_reduce + | ||||
|           TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC typename MakePointer_<Scalar>::Type data() const { return m_result; } | ||||
|   /// required by sycl in order to extract the accessor | ||||
|   const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } | ||||
|   /// added for sycl in order to construct the buffer from the sycl device | ||||
|   const Device& device() const{return m_device;} | ||||
|   /// added for sycl in order to re-construct the reduction eval on the device for the sub-kernel | ||||
|   const Dims& xprDims() const {return m_xpr_dims;} | ||||
|  | ||||
|  | ||||
|   private: | ||||
|   template <int, typename, typename> friend struct internal::GenericDimReducer; | ||||
|   template <typename, typename, bool> friend struct internal::InnerMostDimReducer; | ||||
|   template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver; | ||||
|   template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer; | ||||
| #ifdef EIGEN_USE_THREADS | ||||
|   template <typename S, typename O, bool V> friend struct internal::FullReducerShard; | ||||
| #endif | ||||
| #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) | ||||
|   template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
|   template <typename S, typename R, typename I> friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); | ||||
|   template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); | ||||
|   template <int NPT, typename S, typename R, typename I> friend void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*); | ||||
| #endif | ||||
|   template <int NPT, typename S, typename R, typename I> friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); | ||||
|  | ||||
|   template <int NPT, typename S, typename R, typename I> friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); | ||||
| #endif | ||||
|  | ||||
|   template <typename S, typename O, typename D> friend struct internal::InnerReducer; | ||||
|  | ||||
|   // Returns the Index in the input tensor of the first value that needs to be | ||||
|   // used to compute the reduction at output index "index". | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { | ||||
|     if (ReducingInnerMostDims) { | ||||
|       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|         return index * m_preservedStrides[0]; | ||||
|       } else { | ||||
|         return index * m_preservedStrides[NumPreservedStrides - 1]; | ||||
|       } | ||||
|     } | ||||
|     // TBD: optimize the case where we preserve the innermost dimensions. | ||||
|     Index startInput = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumOutputDims - 1; i > 0; --i) { | ||||
|         // This is index_i in the output tensor. | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         startInput += idx * m_preservedStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       if (PreservingInnerMostDims) { | ||||
|         eigen_assert(m_preservedStrides[0] == 1); | ||||
|         startInput += index; | ||||
|       } else { | ||||
|         startInput += index * m_preservedStrides[0]; | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = 0; i < NumOutputDims - 1; ++i) { | ||||
|         // This is index_i in the output tensor. | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         startInput += idx * m_preservedStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       if (PreservingInnerMostDims) { | ||||
|         eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1); | ||||
|         startInput += index; | ||||
|       } else { | ||||
|         startInput += index * m_preservedStrides[NumPreservedStrides - 1]; | ||||
|       } | ||||
|     } | ||||
|     return startInput; | ||||
|   } | ||||
|  | ||||
|   // Bitmap indicating if an input dimension is reduced or not. | ||||
|   array<bool, NumInputDims> m_reduced; | ||||
|   // Dimensions of the output of the operation. | ||||
|   Dimensions m_dimensions; | ||||
|   // Precomputed strides for the output tensor. | ||||
|   array<Index, NumOutputDims> m_outputStrides; | ||||
|   // Subset of strides of the input tensor for the non-reduced dimensions. | ||||
|   // Indexed by output dimensions. | ||||
|   static const int NumPreservedStrides = max_n_1<NumOutputDims>::size; | ||||
|   array<Index, NumPreservedStrides> m_preservedStrides; | ||||
|  | ||||
|   // Subset of strides of the input tensor for the reduced dimensions. | ||||
|   // Indexed by reduced dimensions. | ||||
|   array<Index, NumReducedDims> m_reducedStrides; | ||||
|   // Size of the input dimensions that are reduced. | ||||
|   // Indexed by reduced dimensions. | ||||
|   array<Index, NumReducedDims> m_reducedDims; | ||||
|  | ||||
|   // Evaluator for the input expression. | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|  | ||||
|   // Operation to apply for computing the reduction. | ||||
|   Op m_reducer; | ||||
|  | ||||
|   // For full reductions | ||||
| #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) | ||||
|   static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value; | ||||
|   static const bool RunningOnSycl = false; | ||||
| #elif defined(EIGEN_USE_SYCL) | ||||
| static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value; | ||||
| static const bool RunningOnGPU = false; | ||||
| #else | ||||
|   static const bool RunningOnGPU = false; | ||||
|   static const bool RunningOnSycl = false; | ||||
| #endif | ||||
|   typename MakePointer_<CoeffReturnType>::Type m_result; | ||||
|  | ||||
|   const Device& m_device; | ||||
|   const Dims& m_xpr_dims; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H | ||||
							
								
								
									
										750
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										750
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,750 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
|  | ||||
| #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) | ||||
| // Full reducers for GPU, don't vectorize for now | ||||
|  | ||||
| // Reducer function that enables multiple cuda thread to safely accumulate at the same | ||||
| // output address. It basically reads the current value of the output variable, and | ||||
| // attempts to update it with the new value. If in the meantime another cuda thread | ||||
| // updated the content of the output address it will try again. | ||||
| template <typename T, typename R> | ||||
| __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { | ||||
| #if __CUDA_ARCH__ >= 300 | ||||
|   if (sizeof(T) == 4) | ||||
|   { | ||||
|     unsigned int oldval = *reinterpret_cast<unsigned int*>(output); | ||||
|     unsigned int newval = oldval; | ||||
|     reducer.reduce(accum, reinterpret_cast<T*>(&newval)); | ||||
|     if (newval == oldval) { | ||||
|       return; | ||||
|     } | ||||
|     unsigned int readback; | ||||
|     while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { | ||||
|       oldval = readback; | ||||
|       newval = oldval; | ||||
|       reducer.reduce(accum, reinterpret_cast<T*>(&newval)); | ||||
|       if (newval == oldval) { | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   else if (sizeof(T) == 8) { | ||||
|     unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output); | ||||
|     unsigned long long newval = oldval; | ||||
|     reducer.reduce(accum, reinterpret_cast<T*>(&newval)); | ||||
|     if (newval == oldval) { | ||||
|       return; | ||||
|     } | ||||
|     unsigned long long readback; | ||||
|     while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { | ||||
|       oldval = readback; | ||||
|       newval = oldval; | ||||
|       reducer.reduce(accum, reinterpret_cast<T*>(&newval)); | ||||
|       if (newval == oldval) { | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   else { | ||||
|     assert(0 && "Wordsize not supported"); | ||||
|   } | ||||
| #else | ||||
|   assert(0 && "Shouldn't be called on unsupported device"); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| // We extend atomicExch to support extra data types | ||||
| template <typename Type> | ||||
| __device__ inline Type atomicExchCustom(Type* address, Type val) { | ||||
|   return atomicExch(address, val); | ||||
| } | ||||
|  | ||||
| template <> | ||||
| __device__ inline double atomicExchCustom(double* address, double val) { | ||||
|   unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address); | ||||
|   return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val))); | ||||
| } | ||||
|  | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
| template <template <typename T> class R> | ||||
| __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) { | ||||
|   unsigned int oldval = *reinterpret_cast<unsigned int*>(output); | ||||
|   unsigned int newval = oldval; | ||||
|   reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval)); | ||||
|   if (newval == oldval) { | ||||
|     return; | ||||
|   } | ||||
|   unsigned int readback; | ||||
|   while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { | ||||
|     oldval = readback; | ||||
|     newval = oldval; | ||||
|     reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval)); | ||||
|     if (newval == oldval) { | ||||
|       return; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| #endif | ||||
|  | ||||
| template <> | ||||
| __device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) { | ||||
| #if __CUDA_ARCH__ >= 300 | ||||
|   atomicAdd(output, accum); | ||||
| #else | ||||
|   assert(0 && "Shouldn't be called on unsupported device"); | ||||
| #endif | ||||
| } | ||||
|  | ||||
|  | ||||
| template <typename CoeffType, typename Index> | ||||
| __global__ void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) { | ||||
|   const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|   const Index num_threads = blockDim.x * gridDim.x; | ||||
|   for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { | ||||
|     output[i] = val; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| template <int BlockSize, int NumPerThread, typename Self, | ||||
|           typename Reducer, typename Index> | ||||
| __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, | ||||
|                                     typename Self::CoeffReturnType* output, unsigned int* semaphore) { | ||||
| #if __CUDA_ARCH__ >= 300 | ||||
|   // Initialize the output value | ||||
|   const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; | ||||
|   if (gridDim.x == 1) { | ||||
|     if (first_index == 0) { | ||||
|       *output = reducer.initialize(); | ||||
|     } | ||||
|   } | ||||
|   else { | ||||
|     if (threadIdx.x == 0) { | ||||
|       unsigned int block = atomicCAS(semaphore, 0u, 1u); | ||||
|       if (block == 0) { | ||||
|         // We're the first block to run, initialize the output value | ||||
|         atomicExchCustom(output, reducer.initialize()); | ||||
|         __threadfence(); | ||||
|         atomicExch(semaphore, 2u); | ||||
|       } | ||||
|       else { | ||||
|         // Wait for the first block to initialize the output value. | ||||
|         // Use atomicCAS here to ensure that the reads aren't cached | ||||
|         unsigned int val; | ||||
|         do { | ||||
|           val = atomicCAS(semaphore, 2u, 2u); | ||||
|         } | ||||
|         while (val < 2u); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   __syncthreads(); | ||||
|  | ||||
|   eigen_assert(gridDim.x == 1 || *semaphore >= 2u); | ||||
|  | ||||
|   typename Self::CoeffReturnType accum = reducer.initialize(); | ||||
|   Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize); | ||||
|   for (Index i = 0; i < max_iter; i+=BlockSize) { | ||||
|     const Index index = first_index + i; | ||||
|     eigen_assert(index < num_coeffs); | ||||
|     typename Self::CoeffReturnType val = input.m_impl.coeff(index); | ||||
|     reducer.reduce(val, &accum); | ||||
|   } | ||||
|  | ||||
| #pragma unroll | ||||
|   for (int offset = warpSize/2; offset > 0; offset /= 2) { | ||||
|     reducer.reduce(__shfl_down(accum, offset, warpSize), &accum); | ||||
|   } | ||||
|  | ||||
|   if ((threadIdx.x & (warpSize - 1)) == 0) { | ||||
|     atomicReduce(output, accum, reducer); | ||||
|   } | ||||
|  | ||||
|   if (gridDim.x > 1 && threadIdx.x == 0) { | ||||
|     // Let the last block reset the semaphore | ||||
|     atomicInc(semaphore, gridDim.x + 1); | ||||
|   } | ||||
| #else | ||||
|   assert(0 && "Shouldn't be called on unsupported device"); | ||||
| #endif | ||||
| } | ||||
|  | ||||
|  | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
| template <typename Self, | ||||
|           typename Reducer, typename Index> | ||||
| __global__ void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half2* scratch) { | ||||
|   eigen_assert(blockDim.x == 1); | ||||
|   eigen_assert(gridDim.x == 1); | ||||
|   if (num_coeffs % 2 != 0) { | ||||
|     half last = input.m_impl.coeff(num_coeffs-1); | ||||
|     *scratch = __halves2half2(last, reducer.initialize()); | ||||
|   } else { | ||||
|     *scratch = reducer.template initializePacket<half2>(); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename Self, | ||||
|           typename Reducer, typename Index> | ||||
| __global__ void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) { | ||||
|   const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|   const Index num_threads = blockDim.x * gridDim.x; | ||||
|   const Index num_packets = num_coeffs / 2; | ||||
|   for (Index i = thread_id; i < num_packets; i += num_threads) { | ||||
|     ((half2*)output)[i] = reducer.template initializePacket<half2>(); | ||||
|   } | ||||
|  | ||||
|   if (thread_id == 0 && num_coeffs % 2 != 0) { | ||||
|     output[num_coeffs-1] = reducer.initialize(); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <int BlockSize, int NumPerThread, typename Self, | ||||
|           typename Reducer, typename Index> | ||||
| __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, | ||||
|                                     half* output, half2* scratch) { | ||||
|   eigen_assert(NumPerThread % 2 == 0); | ||||
|  | ||||
|   const Index first_index = blockIdx.x * BlockSize * NumPerThread + 2*threadIdx.x; | ||||
|  | ||||
|   // Initialize the output value if it wasn't initialized by the ReductionInitKernel | ||||
|   if (gridDim.x == 1 && first_index == 0) { | ||||
|     if (num_coeffs % 2 != 0) { | ||||
|       half last = input.m_impl.coeff(num_coeffs-1); | ||||
|       *scratch = __halves2half2(last, reducer.initialize()); | ||||
|     } else { | ||||
|       *scratch = reducer.template initializePacket<half2>(); | ||||
|     } | ||||
|     __syncthreads(); | ||||
|   } | ||||
|  | ||||
|   half2 accum = reducer.template initializePacket<half2>(); | ||||
|   const Index max_iter = numext::mini<Index>((num_coeffs - first_index) / 2, NumPerThread*BlockSize / 2); | ||||
|   for (Index i = 0; i < max_iter; i += BlockSize) { | ||||
|     const Index index = first_index + 2*i; | ||||
|     eigen_assert(index + 1 < num_coeffs); | ||||
|     half2 val = input.m_impl.template packet<Unaligned>(index); | ||||
|     reducer.reducePacket(val, &accum); | ||||
|   } | ||||
|  | ||||
| #pragma unroll | ||||
|   for (int offset = warpSize/2; offset > 0; offset /= 2) { | ||||
|     reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum); | ||||
|   } | ||||
|  | ||||
|   if ((threadIdx.x & (warpSize - 1)) == 0) { | ||||
|     atomicReduce(scratch, accum, reducer); | ||||
|   } | ||||
|  | ||||
|   __syncthreads(); | ||||
|  | ||||
|   if (gridDim.x == 1 && first_index == 0) { | ||||
|     half tmp = __low2half(*scratch); | ||||
|     reducer.reduce(__high2half(*scratch), &tmp); | ||||
|     *output = tmp; | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename Op> | ||||
| __global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) { | ||||
|   eigen_assert(threadIdx.x == 1); | ||||
|   half tmp = __low2half(*scratch); | ||||
|   reducer.reduce(__high2half(*scratch), &tmp); | ||||
|   *output = tmp; | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void> | ||||
| struct FullReductionLauncher { | ||||
|   static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) { | ||||
|     assert(false && "Should only be called on doubles, floats and half floats"); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| // Specialization for float and double | ||||
| template <typename Self, typename Op, typename OutputType, bool PacketAccess> | ||||
| struct FullReductionLauncher< | ||||
|     Self, Op, OutputType, PacketAccess, | ||||
|     typename internal::enable_if< | ||||
|       internal::is_same<float, OutputType>::value || | ||||
|       internal::is_same<double, OutputType>::value, | ||||
|     void>::type> { | ||||
|   static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) { | ||||
|     typedef typename Self::Index Index; | ||||
|     typedef typename Self::CoeffReturnType Scalar; | ||||
|     const int block_size = 256; | ||||
|     const int num_per_thread = 128; | ||||
|     const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread); | ||||
|  | ||||
|     unsigned int* semaphore = NULL; | ||||
|     if (num_blocks > 1) { | ||||
|       semaphore = device.semaphore(); | ||||
|     } | ||||
|  | ||||
|     LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>), | ||||
|                        num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
| template <typename Self, typename Op> | ||||
| struct FullReductionLauncher<Self, Op, Eigen::half, false> { | ||||
|   static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) { | ||||
|     assert(false && "Should not be called since there is no packet accessor"); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Self, typename Op> | ||||
| struct FullReductionLauncher<Self, Op, Eigen::half, true> { | ||||
|   static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) { | ||||
|     typedef typename Self::Index Index; | ||||
|  | ||||
|     const int block_size = 256; | ||||
|     const int num_per_thread = 128; | ||||
|     const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread); | ||||
|     half2* scratch = static_cast<half2*>(device.scratchpad()); | ||||
|  | ||||
|     if (num_blocks > 1) { | ||||
|       // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there | ||||
|       // won't be a race conditions between multiple thread blocks. | ||||
|       LAUNCH_CUDA_KERNEL((ReductionInitFullReduxKernelHalfFloat<Self, Op, Index>), | ||||
|                          1, 1, 0, device, reducer, self, num_coeffs, scratch); | ||||
|     } | ||||
|  | ||||
|     LAUNCH_CUDA_KERNEL((FullReductionKernelHalfFloat<block_size, num_per_thread, Self, Op, Index>), | ||||
|                        num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch); | ||||
|  | ||||
|     if (num_blocks > 1) { | ||||
|       LAUNCH_CUDA_KERNEL((ReductionCleanupKernelHalfFloat<Op>), | ||||
|                          1, 1, 0, device, reducer, output, scratch); | ||||
|     } | ||||
|   } | ||||
| }; | ||||
| #endif | ||||
|  | ||||
|  | ||||
| template <typename Self, typename Op, bool Vectorizable> | ||||
| struct FullReducer<Self, Op, GpuDevice, Vectorizable> { | ||||
|   // Unfortunately nvidia doesn't support well exotic types such as complex, | ||||
|   // so reduce the scope of the optimized version of the code to the simple cases | ||||
|   // of doubles, floats and half floats | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
|   static const bool HasOptimizedImplementation = !Op::IsStateful && | ||||
|       (internal::is_same<typename Self::CoeffReturnType, float>::value || | ||||
|        internal::is_same<typename Self::CoeffReturnType, double>::value || | ||||
|        (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); | ||||
| #else | ||||
|   static const bool HasOptimizedImplementation = !Op::IsStateful && | ||||
|                                                 (internal::is_same<typename Self::CoeffReturnType, float>::value || | ||||
|                                                  internal::is_same<typename Self::CoeffReturnType, double>::value); | ||||
| #endif | ||||
|  | ||||
|   template <typename OutputType> | ||||
|   static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { | ||||
|     assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats"); | ||||
|     const Index num_coeffs = array_prod(self.m_impl.dimensions()); | ||||
|     // Don't crash when we're called with an input tensor of size 0. | ||||
|     if (num_coeffs == 0) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <int NumPerThread, typename Self, | ||||
|           typename Reducer, typename Index> | ||||
| __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, | ||||
|                                          typename Self::CoeffReturnType* output) { | ||||
| #if __CUDA_ARCH__ >= 300 | ||||
|   typedef typename Self::CoeffReturnType Type; | ||||
|   eigen_assert(blockDim.y == 1); | ||||
|   eigen_assert(blockDim.z == 1); | ||||
|   eigen_assert(gridDim.y == 1); | ||||
|   eigen_assert(gridDim.z == 1); | ||||
|  | ||||
|   const int unroll_times = 16; | ||||
|   eigen_assert(NumPerThread % unroll_times == 0); | ||||
|  | ||||
|   const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread); | ||||
|   const Index num_input_blocks = input_col_blocks * num_preserved_coeffs; | ||||
|  | ||||
|   const Index num_threads = blockDim.x * gridDim.x; | ||||
|   const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|  | ||||
|   // Initialize the output values if they weren't initialized by the ReductionInitKernel | ||||
|   if (gridDim.x == 1) { | ||||
|     for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { | ||||
|       output[i] = reducer.initialize(); | ||||
|     } | ||||
|     __syncthreads(); | ||||
|   } | ||||
|  | ||||
|   for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) { | ||||
|     const Index row = i / input_col_blocks; | ||||
|  | ||||
|     if (row < num_preserved_coeffs) { | ||||
|       const Index col_block = i % input_col_blocks; | ||||
|       const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x; | ||||
|  | ||||
|       Type reduced_val = reducer.initialize(); | ||||
|  | ||||
|       for (Index j = 0; j < NumPerThread; j += unroll_times) { | ||||
|         const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1); | ||||
|         if (last_col >= num_coeffs_to_reduce) { | ||||
|           for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) { | ||||
|             const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col); | ||||
|             reducer.reduce(val, &reduced_val); | ||||
|           } | ||||
|           break; | ||||
|         } else { | ||||
|           // Faster version of the loop with no branches after unrolling. | ||||
| #pragma unroll | ||||
|           for (int k = 0; k < unroll_times; ++k) { | ||||
|             const Index col = col_begin + blockDim.x * (j + k); | ||||
|             reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|  | ||||
| #pragma unroll | ||||
|       for (int offset = warpSize/2; offset > 0; offset /= 2) { | ||||
|         reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val); | ||||
|       } | ||||
|  | ||||
|       if ((threadIdx.x & (warpSize - 1)) == 0) { | ||||
|         atomicReduce(&(output[row]), reduced_val, reducer); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| #else | ||||
|   assert(0 && "Shouldn't be called on unsupported device"); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
|  | ||||
| template <int NumPerThread, typename Self, | ||||
|           typename Reducer, typename Index> | ||||
| __global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, | ||||
|                                               half* output) { | ||||
|   eigen_assert(blockDim.y == 1); | ||||
|   eigen_assert(blockDim.z == 1); | ||||
|   eigen_assert(gridDim.y == 1); | ||||
|   eigen_assert(gridDim.z == 1); | ||||
|  | ||||
|   const int unroll_times = 16; | ||||
|   eigen_assert(NumPerThread % unroll_times == 0); | ||||
|   eigen_assert(unroll_times % 2 == 0); | ||||
|  | ||||
|   const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2); | ||||
|   const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2); | ||||
|  | ||||
|   const Index num_threads = blockDim.x * gridDim.x; | ||||
|   const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|  | ||||
|   // Initialize the output values if they weren't initialized by the ReductionInitKernel | ||||
|   if (gridDim.x == 1) { | ||||
|     Index i = 2*thread_id; | ||||
|     for (; i + 1 < num_preserved_coeffs; i += 2*num_threads) { | ||||
|       half* loc = output + i; | ||||
|       *((half2*)loc) = reducer.template initializePacket<half2>(); | ||||
|     } | ||||
|     if (i < num_preserved_coeffs) { | ||||
|       output[i] = reducer.initialize(); | ||||
|     } | ||||
|     __syncthreads(); | ||||
|   } | ||||
|  | ||||
|   for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) { | ||||
|     const Index row = 2 * (i / input_col_blocks); | ||||
|  | ||||
|     if (row + 1 < num_preserved_coeffs) { | ||||
|       const Index col_block = i % input_col_blocks; | ||||
|       const Index col_begin = 2 * (col_block * blockDim.x * NumPerThread + threadIdx.x); | ||||
|  | ||||
|       half2 reduced_val1 = reducer.template initializePacket<half2>(); | ||||
|       half2 reduced_val2 = reducer.template initializePacket<half2>(); | ||||
|  | ||||
|       for (Index j = 0; j < NumPerThread; j += unroll_times) { | ||||
|         const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1) * 2; | ||||
|         if (last_col >= num_coeffs_to_reduce) { | ||||
|           Index col = col_begin + blockDim.x * j; | ||||
|           for (; col + 1 < num_coeffs_to_reduce; col += blockDim.x) { | ||||
|             const half2 val1 = input.m_impl.template packet<Unaligned>(row * num_coeffs_to_reduce + col); | ||||
|             reducer.reducePacket(val1, &reduced_val1); | ||||
|             const half2 val2 = input.m_impl.template packet<Unaligned>((row+1) * num_coeffs_to_reduce + col); | ||||
|             reducer.reducePacket(val2, &reduced_val2); | ||||
|           } | ||||
|           if (col < num_coeffs_to_reduce) { | ||||
|             // Peel; | ||||
|             const half last1 = input.m_impl.coeff(row * num_coeffs_to_reduce + col); | ||||
|             const half2 val1 = __halves2half2(last1, reducer.initialize()); | ||||
|             reducer.reducePacket(val1, &reduced_val1); | ||||
|             const half last2 = input.m_impl.coeff((row+1) * num_coeffs_to_reduce + col); | ||||
|             const half2 val2 = __halves2half2(last2, reducer.initialize()); | ||||
|             reducer.reducePacket(val2, &reduced_val2); | ||||
|           } | ||||
|           break; | ||||
|         } else { | ||||
|           // Faster version of the loop with no branches after unrolling. | ||||
| #pragma unroll | ||||
|           for (int k = 0; k < unroll_times; ++k) { | ||||
|             const Index col = col_begin + blockDim.x * (j + k) * 2; | ||||
|             reducer.reducePacket(input.m_impl.template packet<Unaligned>(row * num_coeffs_to_reduce + col), &reduced_val1); | ||||
|             reducer.reducePacket(input.m_impl.template packet<Unaligned>((row + 1)* num_coeffs_to_reduce + col), &reduced_val2); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|  | ||||
| #pragma unroll | ||||
|       for (int offset = warpSize/2; offset > 0; offset /= 2) { | ||||
|         reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1); | ||||
|         reducer.reducePacket(__shfl_down(reduced_val2, offset, warpSize), &reduced_val2); | ||||
|       } | ||||
|  | ||||
|       half val1 =  __low2half(reduced_val1); | ||||
|       reducer.reduce(__high2half(reduced_val1), &val1); | ||||
|       half val2 =  __low2half(reduced_val2); | ||||
|       reducer.reduce(__high2half(reduced_val2), &val2); | ||||
|       half2 val = __halves2half2(val1, val2); | ||||
|  | ||||
|       if ((threadIdx.x & (warpSize - 1)) == 0) { | ||||
|         half* loc = output + row; | ||||
|         atomicReduce((half2*)loc, val, reducer); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void> | ||||
| struct InnerReductionLauncher { | ||||
|   static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) { | ||||
|     assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device"); | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| // Specialization for float and double | ||||
| template <typename Self, typename Op, typename OutputType, bool PacketAccess> | ||||
| struct InnerReductionLauncher< | ||||
|   Self, Op, OutputType, PacketAccess, | ||||
|   typename internal::enable_if< | ||||
|     internal::is_same<float, OutputType>::value || | ||||
|     internal::is_same<double, OutputType>::value, | ||||
|   void>::type> { | ||||
|   static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { | ||||
|     typedef typename Self::Index Index; | ||||
|  | ||||
|     const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; | ||||
|     const int block_size = 256; | ||||
|     const int num_per_thread = 128; | ||||
|     const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); | ||||
|     const int max_blocks = device.getNumCudaMultiProcessors() * | ||||
|                            device.maxCudaThreadsPerMultiProcessor() / block_size; | ||||
|     const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); | ||||
|  | ||||
|     if (num_blocks > 1) { | ||||
|       // We initialize the outputs outside the reduction kernel when we can't be sure that there | ||||
|       // won't be a race conditions between multiple thread blocks. | ||||
|       const int dyn_blocks = divup<int>(num_preserved_vals, 1024); | ||||
|       const int max_blocks = device.getNumCudaMultiProcessors() * | ||||
|                            device.maxCudaThreadsPerMultiProcessor() / 1024; | ||||
|       const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); | ||||
|       LAUNCH_CUDA_KERNEL((ReductionInitKernel<OutputType, Index>), | ||||
|                          num_blocks, 1024, 0, device, reducer.initialize(), | ||||
|                          num_preserved_vals, output); | ||||
|     } | ||||
|  | ||||
|     LAUNCH_CUDA_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>), | ||||
|                        num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); | ||||
|  | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
| template <typename Self, typename Op> | ||||
| struct InnerReductionLauncher<Self, Op, Eigen::half, false> { | ||||
|   static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) { | ||||
|     assert(false && "Should not be called since there is no packet accessor"); | ||||
|     return true; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Self, typename Op> | ||||
| struct InnerReductionLauncher<Self, Op, Eigen::half, true> { | ||||
|   static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { | ||||
|     typedef typename Self::Index Index; | ||||
|  | ||||
|     if (num_preserved_vals % 2 != 0) { | ||||
|       // Not supported yet, revert to the slower code path | ||||
|       return true; | ||||
|     } | ||||
|  | ||||
|     const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; | ||||
|     const int block_size = /*256*/128; | ||||
|     const int num_per_thread = /*128*/64; | ||||
|     const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); | ||||
|     const int max_blocks = device.getNumCudaMultiProcessors() * | ||||
|                            device.maxCudaThreadsPerMultiProcessor() / block_size; | ||||
|     const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); | ||||
|  | ||||
|     if (num_blocks > 1) { | ||||
|       // We initialize the outputs outside the reduction kernel when we can't be sure that there | ||||
|       // won't be a race conditions between multiple thread blocks. | ||||
|       const int dyn_blocks = divup<int>(num_preserved_vals, 1024); | ||||
|       const int max_blocks = device.getNumCudaMultiProcessors() * | ||||
|                            device.maxCudaThreadsPerMultiProcessor() / 1024; | ||||
|       const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); | ||||
|       LAUNCH_CUDA_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>), | ||||
|                          1, 1, 0, device, reducer, self, num_preserved_vals, output); | ||||
|     } | ||||
|  | ||||
|     LAUNCH_CUDA_KERNEL((InnerReductionKernelHalfFloat<num_per_thread, Self, Op, Index>), | ||||
|                        num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); | ||||
|  | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
| #endif | ||||
|  | ||||
|  | ||||
| template <typename Self, typename Op> | ||||
| struct InnerReducer<Self, Op, GpuDevice> { | ||||
|   // Unfortunately nvidia doesn't support well exotic types such as complex, | ||||
|   // so reduce the scope of the optimized version of the code to the simple case | ||||
|   // of floats and half floats. | ||||
| #ifdef EIGEN_HAS_CUDA_FP16 | ||||
|   static const bool HasOptimizedImplementation = !Op::IsStateful && | ||||
|       (internal::is_same<typename Self::CoeffReturnType, float>::value || | ||||
|        internal::is_same<typename Self::CoeffReturnType, double>::value || | ||||
|        (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); | ||||
| #else | ||||
|   static const bool HasOptimizedImplementation = !Op::IsStateful && | ||||
|                                                  (internal::is_same<typename Self::CoeffReturnType, float>::value || | ||||
|                                                   internal::is_same<typename Self::CoeffReturnType, double>::value); | ||||
| #endif | ||||
|  | ||||
|   template <typename OutputType> | ||||
|   static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { | ||||
|     assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats"); | ||||
|     const Index num_coeffs = array_prod(self.m_impl.dimensions()); | ||||
|     // Don't crash when we're called with an input tensor of size 0. | ||||
|     if (num_coeffs == 0) { | ||||
|       return true; | ||||
|     } | ||||
|     // It's faster to use the usual code. | ||||
|     if (num_coeffs_to_reduce <= 128) { | ||||
|       return true; | ||||
|     } | ||||
|  | ||||
|     return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <int NumPerThread, typename Self, | ||||
|           typename Reducer, typename Index> | ||||
| __global__ void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, | ||||
|                                      typename Self::CoeffReturnType* output) { | ||||
|   const Index num_threads = blockDim.x * gridDim.x; | ||||
|   const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; | ||||
|   // Initialize the output values if they weren't initialized by the ReductionInitKernel | ||||
|   if (gridDim.x == 1) { | ||||
|     for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { | ||||
|       output[i] = reducer.initialize(); | ||||
|     } | ||||
|     __syncthreads(); | ||||
|   } | ||||
|  | ||||
|   // Do the reduction. | ||||
|   const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread); | ||||
|   for (Index i = thread_id; i < max_iter; i += num_threads) { | ||||
|     const Index input_col = i % num_preserved_coeffs; | ||||
|     const Index input_row = (i / num_preserved_coeffs) * NumPerThread; | ||||
|     typename Self::CoeffReturnType reduced_val = reducer.initialize(); | ||||
|     const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce); | ||||
|     for (Index j = input_row; j < max_row; j++) { | ||||
|       typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col); | ||||
|       reducer.reduce(val, &reduced_val); | ||||
|     } | ||||
|     atomicReduce(&(output[input_col]), reduced_val, reducer); | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| template <typename Self, typename Op> | ||||
| struct OuterReducer<Self, Op, GpuDevice> { | ||||
|   // Unfortunately nvidia doesn't support well exotic types such as complex, | ||||
|   // so reduce the scope of the optimized version of the code to the simple case | ||||
|   // of floats. | ||||
|   static const bool HasOptimizedImplementation = !Op::IsStateful && | ||||
|                                                  (internal::is_same<typename Self::CoeffReturnType, float>::value || | ||||
|                                                   internal::is_same<typename Self::CoeffReturnType, double>::value); | ||||
|   template <typename Device, typename OutputType> | ||||
|   static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) { | ||||
|     assert(false && "Should only be called to reduce doubles or floats on a gpu device"); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { | ||||
|     typedef typename Self::Index Index; | ||||
|  | ||||
|     // It's faster to use the usual code. | ||||
|     if (num_coeffs_to_reduce <= 32) { | ||||
|       return true; | ||||
|     } | ||||
|  | ||||
|     const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; | ||||
|     const int block_size = 256; | ||||
|     const int num_per_thread = 16; | ||||
|     const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); | ||||
|     const int max_blocks = device.getNumCudaMultiProcessors() * | ||||
|                            device.maxCudaThreadsPerMultiProcessor() / block_size; | ||||
|     const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); | ||||
|  | ||||
|     if (num_blocks > 1) { | ||||
|       // We initialize the outputs in the reduction kernel itself when we don't have to worry | ||||
|       // about race conditions between multiple thread blocks. | ||||
|       const int dyn_blocks = divup<int>(num_preserved_vals, 1024); | ||||
|       const int max_blocks = device.getNumCudaMultiProcessors() * | ||||
|                              device.maxCudaThreadsPerMultiProcessor() / 1024; | ||||
|       const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); | ||||
|       LAUNCH_CUDA_KERNEL((ReductionInitKernel<float, Index>), | ||||
|                          num_blocks, 1024, 0, device, reducer.initialize(), | ||||
|                          num_preserved_vals, output); | ||||
|     } | ||||
|  | ||||
|     LAUNCH_CUDA_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>), | ||||
|                        num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); | ||||
|  | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #endif | ||||
|  | ||||
|  | ||||
| } // end namespace internal | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H | ||||
							
								
								
									
										242
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										242
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,242 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensorSyclPlaceHolderExpr.h | ||||
|  * | ||||
|  * \brief: | ||||
|  *  This is the specialisation of the placeholder expression based on the | ||||
|  * operation type | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
| template<typename CoeffReturnType, typename KernelName> struct syclGenericBufferReducer{ | ||||
| template<typename BufferTOut, typename BufferTIn> | ||||
| static void run(BufferTOut* bufOut, BufferTIn& bufI, const Eigen::SyclDevice& dev, size_t length, size_t local){ | ||||
|   do { | ||||
|           auto f = [length, local, bufOut, &bufI](cl::sycl::handler& h) mutable { | ||||
|             cl::sycl::nd_range<1> r{cl::sycl::range<1>{std::max(length, local)}, | ||||
|                                     cl::sycl::range<1>{std::min(length, local)}}; | ||||
|             /* Two accessors are used: one to the buffer that is being reduced, | ||||
|              * and a second to local memory, used to store intermediate data. */ | ||||
|             auto aI = | ||||
|                 bufI.template get_access<cl::sycl::access::mode::read_write>(h); | ||||
|             auto aOut = | ||||
|                 bufOut->template get_access<cl::sycl::access::mode::discard_write>(h); | ||||
|             cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, | ||||
|                                cl::sycl::access::target::local> | ||||
|                 scratch(cl::sycl::range<1>(local), h); | ||||
|  | ||||
|             /* The parallel_for invocation chosen is the variant with an nd_item | ||||
|              * parameter, since the code requires barriers for correctness. */ | ||||
|             h.parallel_for<KernelName>( | ||||
|                 r, [aOut, aI, scratch, local, length](cl::sycl::nd_item<1> id) { | ||||
|                   size_t globalid = id.get_global(0); | ||||
|                   size_t localid = id.get_local(0); | ||||
|                   /* All threads collectively read from global memory into local. | ||||
|                    * The barrier ensures all threads' IO is resolved before | ||||
|                    * execution continues (strictly speaking, all threads within | ||||
|                    * a single work-group - there is no co-ordination between | ||||
|                    * work-groups, only work-items). */ | ||||
|                   if (globalid < length) { | ||||
|                     scratch[localid] = aI[globalid]; | ||||
|                   } | ||||
|                   id.barrier(cl::sycl::access::fence_space::local_space); | ||||
|  | ||||
|                   /* Apply the reduction operation between the current local | ||||
|                    * id and the one on the other half of the vector. */ | ||||
|                   if (globalid < length) { | ||||
|                     int min = (length < local) ? length : local; | ||||
|                     for (size_t offset = min / 2; offset > 0; offset /= 2) { | ||||
|                       if (localid < offset) { | ||||
|                         scratch[localid] += scratch[localid + offset]; | ||||
|                       } | ||||
|                       id.barrier(cl::sycl::access::fence_space::local_space); | ||||
|                     } | ||||
|                     /* The final result will be stored in local id 0. */ | ||||
|                     if (localid == 0) { | ||||
|                       aI[id.get_group(0)] = scratch[localid]; | ||||
|                       if((length<=local) && globalid ==0){ | ||||
|                         aOut[globalid]=scratch[localid]; | ||||
|                       } | ||||
|                     } | ||||
|                   } | ||||
|                 }); | ||||
|           }; | ||||
|             dev.m_queue.submit(f); | ||||
|             dev.m_queue.throw_asynchronous(); | ||||
|  | ||||
|           /* At this point, you could queue::wait_and_throw() to ensure that | ||||
|            * errors are caught quickly. However, this would likely impact | ||||
|            * performance negatively. */ | ||||
|           length = length / local; | ||||
|  | ||||
|         } while (length > 1); | ||||
|  | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
| }; | ||||
|  | ||||
| /// For now let's start with a full reducer | ||||
| /// Self is useless here because in expression construction we are going to treat reduction as a leafnode. | ||||
| /// we want to take reduction child and then build a construction and apply the full reducer function on it. Fullreducre applies the | ||||
| /// reduction operation on the child of the reduction. once it is done the reduction is an empty shell and can be thrown away and treated as | ||||
| // a leafNode. | ||||
| template <typename Self, typename Op, bool Vectorizable> | ||||
| struct FullReducer<Self, Op, const Eigen::SyclDevice, Vectorizable> { | ||||
|  | ||||
|   typedef typename Self::CoeffReturnType CoeffReturnType; | ||||
|   static const bool HasOptimizedImplementation = false; | ||||
|  | ||||
|   static void run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output) { | ||||
|     typedef const typename Self::ChildType HostExpr; /// this is the child of reduction | ||||
|     typedef  typename TensorSycl::internal::createPlaceHolderExpression<HostExpr>::Type PlaceHolderExpr; | ||||
|     auto functors = TensorSycl::internal::extractFunctors(self.impl()); | ||||
|     int red_factor =256; /// initial reduction. If the size is less than red_factor we only creates one thread. | ||||
|     size_t inputSize =self.impl().dimensions().TotalSize(); | ||||
|     size_t rng = inputSize/red_factor; // the total number of thread initially is half the size of the input | ||||
|     size_t remaining = inputSize% red_factor; | ||||
|     if(rng ==0) { | ||||
|       red_factor=1; | ||||
|     }; | ||||
|     size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; | ||||
|     size_t GRange=std::max((size_t )1, rng); | ||||
|  | ||||
|     // convert global range to power of 2 for redecution | ||||
|     GRange--; | ||||
|     GRange |= GRange >> 1; | ||||
|     GRange |= GRange >> 2; | ||||
|     GRange |= GRange >> 4; | ||||
|     GRange |= GRange >> 8; | ||||
|     GRange |= GRange >> 16; | ||||
| #if __x86_64__ || __ppc64__ || _WIN64 | ||||
|     GRange |= GRange >> 32; | ||||
| #endif | ||||
|     GRange++; | ||||
|     size_t  outTileSize = tileSize; | ||||
|     /// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one. | ||||
|     if (GRange < outTileSize) outTileSize=GRange; | ||||
|     // getting final out buffer at the moment the created buffer is true because there is no need for assign | ||||
|     auto out_buffer =dev.template get_sycl_buffer<typename Eigen::internal::remove_all<CoeffReturnType>::type>(self.dimensions().TotalSize(), output); | ||||
|     /// creating the shared memory for calculating reduction. | ||||
|     /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can | ||||
|     /// recursively apply reduction on it in order to reduce the whole. | ||||
|     auto temp_global_buffer =cl::sycl::buffer<CoeffReturnType, 1>(cl::sycl::range<1>(GRange)); | ||||
|     typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims; | ||||
|     Dims dims= self.xprDims(); | ||||
|     Op functor = reducer; | ||||
|     dev.m_queue.submit([&](cl::sycl::handler &cgh) { | ||||
|       // create a tuple of accessors from Evaluator | ||||
|       auto tuple_of_accessors =  TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); | ||||
|       auto tmp_global_accessor = temp_global_buffer. template get_access<cl::sycl::access::mode::read_write, cl::sycl::access::target::global_buffer>(cgh); | ||||
|  | ||||
|       cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(outTileSize)), [=](cl::sycl::nd_item<1> itemID) { | ||||
|         typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr; | ||||
|         auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); | ||||
|         /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour | ||||
|         /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the | ||||
|         /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. | ||||
|         const auto device_self_expr= TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor); | ||||
|         /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is | ||||
|         /// the device_evaluator is detectable and recognisable on the device. | ||||
|         auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice()); | ||||
|         /// const cast added as a naive solution to solve the qualifier drop error | ||||
|         auto globalid=itemID.get_global_linear_id(); | ||||
|  | ||||
|         if(globalid<rng) | ||||
|           tmp_global_accessor.get_pointer()[globalid]=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, red_factor*globalid, red_factor, const_cast<Op&>(functor)); | ||||
|         else | ||||
|           tmp_global_accessor.get_pointer()[globalid]=static_cast<CoeffReturnType>(0); | ||||
|  | ||||
|         if(remaining!=0 && globalid==0 ) | ||||
|           // this will add the rest of input buffer when the input size is not devidable to red_factor. | ||||
|           tmp_global_accessor.get_pointer()[globalid]+=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, red_factor*(rng), remaining, const_cast<Op&>(functor)); | ||||
|       }); | ||||
|     }); | ||||
|   dev.m_queue.throw_asynchronous(); | ||||
|  | ||||
| /// This is used to recursively reduce the tmp value to an element of 1; | ||||
|   syclGenericBufferReducer<CoeffReturnType,HostExpr>::run(out_buffer, temp_global_buffer,dev, GRange,  outTileSize); | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| template <typename Self, typename Op> | ||||
| struct InnerReducer<Self, Op, const Eigen::SyclDevice> { | ||||
|  | ||||
|   typedef typename Self::CoeffReturnType CoeffReturnType; | ||||
|   static const bool HasOptimizedImplementation = false; | ||||
|  | ||||
|   static bool run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output, typename Self::Index , typename Self::Index num_coeffs_to_preserve) { | ||||
|     typedef const typename Self::ChildType HostExpr; /// this is the child of reduction | ||||
|     typedef  typename TensorSycl::internal::createPlaceHolderExpression<HostExpr>::Type PlaceHolderExpr; | ||||
|     auto functors = TensorSycl::internal::extractFunctors(self.impl()); | ||||
|  | ||||
|     size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; | ||||
|  | ||||
|     size_t GRange=num_coeffs_to_preserve; | ||||
|     if (tileSize>GRange) tileSize=GRange; | ||||
|     else if(GRange>tileSize){ | ||||
|       size_t xMode = GRange % tileSize; | ||||
|       if (xMode != 0) GRange += (tileSize - xMode); | ||||
|     } | ||||
|     // getting final out buffer at the moment the created buffer is true because there is no need for assign | ||||
|     /// creating the shared memory for calculating reduction. | ||||
|     /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can | ||||
|     /// recursively apply reduction on it in order to reduce the whole. | ||||
|     typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims; | ||||
|     Dims dims= self.xprDims(); | ||||
|     Op functor = reducer; | ||||
|  | ||||
|     dev.m_queue.submit([&](cl::sycl::handler &cgh) { | ||||
|       // create a tuple of accessors from Evaluator | ||||
|       auto tuple_of_accessors =  TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); | ||||
|       auto output_accessor = dev.template get_sycl_accessor<cl::sycl::access::mode::discard_write>(num_coeffs_to_preserve,cgh, output); | ||||
|  | ||||
|       cgh.parallel_for<Self>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { | ||||
|         typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr; | ||||
|         auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); | ||||
|         /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour | ||||
|         /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the | ||||
|         /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. | ||||
|         const auto device_self_expr= TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor); | ||||
|         /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is | ||||
|         /// the device_evaluator is detectable and recognisable on the device. | ||||
|         typedef Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice> DeiceSelf; | ||||
|         auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice()); | ||||
|         /// const cast added as a naive solution to solve the qualifier drop error | ||||
|         auto globalid=itemID.get_global_linear_id(); | ||||
|         if (globalid< static_cast<size_t>(num_coeffs_to_preserve)) { | ||||
|           typename DeiceSelf::CoeffReturnType accum = functor.initialize(); | ||||
|           GenericDimReducer<DeiceSelf::NumReducedDims-1, DeiceSelf, Op>::reduce(device_self_evaluator, device_self_evaluator.firstInput(globalid),const_cast<Op&>(functor), &accum); | ||||
|           functor.finalize(accum); | ||||
|           output_accessor.get_pointer()[globalid]= accum; | ||||
|         } | ||||
|       }); | ||||
|     }); | ||||
|   dev.m_queue.throw_asynchronous(); | ||||
|     return false; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP | ||||
							
								
								
									
										429
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										429
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,429 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_REF_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template <typename Dimensions, typename Scalar> | ||||
| class TensorLazyBaseEvaluator { | ||||
|  public: | ||||
|   TensorLazyBaseEvaluator() : m_refcount(0) { } | ||||
|   virtual ~TensorLazyBaseEvaluator() { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const = 0; | ||||
|   EIGEN_DEVICE_FUNC virtual const Scalar* data() const = 0; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const = 0; | ||||
|   EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) = 0; | ||||
|  | ||||
|   void incrRefCount() { ++m_refcount; } | ||||
|   void decrRefCount() { --m_refcount; } | ||||
|   int refCount() const { return m_refcount; } | ||||
|  | ||||
|  private: | ||||
|   // No copy, no assigment; | ||||
|   TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other); | ||||
|   TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other); | ||||
|  | ||||
|   int m_refcount; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename Dimensions, typename Expr, typename Device> | ||||
| class TensorLazyEvaluatorReadOnly : public TensorLazyBaseEvaluator<Dimensions, typename TensorEvaluator<Expr, Device>::Scalar> { | ||||
|  public: | ||||
|   //  typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions; | ||||
|   typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar; | ||||
|  | ||||
|   TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) { | ||||
|     m_dims = m_impl.dimensions(); | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|   } | ||||
|   virtual ~TensorLazyEvaluatorReadOnly() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const { | ||||
|     return m_dims; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC virtual const Scalar* data() const { | ||||
|     return m_impl.data(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const { | ||||
|     return m_impl.coeff(index); | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex /*index*/) { | ||||
|     eigen_assert(false && "can't reference the coefficient of a rvalue"); | ||||
|     return m_dummy; | ||||
|   }; | ||||
|  | ||||
|  protected: | ||||
|   TensorEvaluator<Expr, Device> m_impl; | ||||
|   Dimensions m_dims; | ||||
|   Scalar m_dummy; | ||||
| }; | ||||
|  | ||||
| template <typename Dimensions, typename Expr, typename Device> | ||||
| class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> { | ||||
|  public: | ||||
|   typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base; | ||||
|   typedef typename Base::Scalar Scalar; | ||||
|  | ||||
|   TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) { | ||||
|   } | ||||
|   virtual ~TensorLazyEvaluatorWritable() { | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) { | ||||
|     return this->m_impl.coeffRef(index); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename Dimensions, typename Expr, typename Device> | ||||
| class TensorLazyEvaluator : public internal::conditional<bool(internal::is_lvalue<Expr>::value), | ||||
|                             TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, | ||||
|                             TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type { | ||||
|  public: | ||||
|   typedef typename internal::conditional<bool(internal::is_lvalue<Expr>::value), | ||||
|                                          TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, | ||||
|                                          TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type Base; | ||||
|   typedef typename Base::Scalar Scalar; | ||||
|  | ||||
|   TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) { | ||||
|   } | ||||
|   virtual ~TensorLazyEvaluator() { | ||||
|   } | ||||
| }; | ||||
|  | ||||
| }  // namespace internal | ||||
|  | ||||
|  | ||||
| /** \class TensorRef | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief A reference to a tensor expression | ||||
|   * The expression will be evaluated lazily (as much as possible). | ||||
|   * | ||||
|   */ | ||||
| template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef<PlainObjectType> > | ||||
| { | ||||
|   public: | ||||
|     typedef TensorRef<PlainObjectType> Self; | ||||
|     typedef typename PlainObjectType::Base Base; | ||||
|     typedef typename Eigen::internal::nested<Self>::type Nested; | ||||
|     typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; | ||||
|     typedef typename internal::traits<PlainObjectType>::Index Index; | ||||
|     typedef typename internal::traits<PlainObjectType>::Scalar Scalar; | ||||
|     typedef typename NumTraits<Scalar>::Real RealScalar; | ||||
|     typedef typename Base::CoeffReturnType CoeffReturnType; | ||||
|     typedef Scalar* PointerType; | ||||
|     typedef PointerType PointerArgType; | ||||
|  | ||||
|     static const Index NumIndices = PlainObjectType::NumIndices; | ||||
|     typedef typename PlainObjectType::Dimensions Dimensions; | ||||
|  | ||||
|     enum { | ||||
|       IsAligned = false, | ||||
|       PacketAccess = false, | ||||
|       Layout = PlainObjectType::Layout, | ||||
|       CoordAccess = false,  // to be implemented | ||||
|       RawAccess = false | ||||
|     }; | ||||
|  | ||||
|     EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) { | ||||
|     } | ||||
|  | ||||
|     template <typename Expression> | ||||
|     EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice())) { | ||||
|       m_evaluator->incrRefCount(); | ||||
|     } | ||||
|  | ||||
|     template <typename Expression> | ||||
|     EIGEN_STRONG_INLINE TensorRef& operator = (const Expression& expr) { | ||||
|       unrefEvaluator(); | ||||
|       m_evaluator = new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice()); | ||||
|       m_evaluator->incrRefCount(); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     ~TensorRef() { | ||||
|       unrefEvaluator(); | ||||
|     } | ||||
|  | ||||
|     TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) { | ||||
|       eigen_assert(m_evaluator->refCount() > 0); | ||||
|       m_evaluator->incrRefCount(); | ||||
|     } | ||||
|  | ||||
|     TensorRef& operator = (const TensorRef& other) { | ||||
|       if (this != &other) { | ||||
|         unrefEvaluator(); | ||||
|         m_evaluator = other.m_evaluator; | ||||
|         eigen_assert(m_evaluator->refCount() > 0); | ||||
|         m_evaluator->incrRefCount(); | ||||
|       } | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Index size() const { return m_evaluator->dimensions().TotalSize(); } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar* data() const { return m_evaluator->data(); } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar operator()(Index index) const | ||||
|     { | ||||
|       return m_evaluator->coeff(index); | ||||
|     } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template<typename... IndexTypes> EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const | ||||
|     { | ||||
|       const std::size_t num_indices = (sizeof...(otherIndices) + 1); | ||||
|       const array<Index, num_indices> indices{{firstIndex, otherIndices...}}; | ||||
|       return coeff(indices); | ||||
|     } | ||||
|     template<typename... IndexTypes> EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) | ||||
|     { | ||||
|       const std::size_t num_indices = (sizeof...(otherIndices) + 1); | ||||
|       const array<Index, num_indices> indices{{firstIndex, otherIndices...}}; | ||||
|       return coeffRef(indices); | ||||
|     } | ||||
| #else | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const | ||||
|     { | ||||
|       array<Index, 2> indices; | ||||
|       indices[0] = i0; | ||||
|       indices[1] = i1; | ||||
|       return coeff(indices); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const | ||||
|     { | ||||
|       array<Index, 3> indices; | ||||
|       indices[0] = i0; | ||||
|       indices[1] = i1; | ||||
|       indices[2] = i2; | ||||
|       return coeff(indices); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const | ||||
|     { | ||||
|       array<Index, 4> indices; | ||||
|       indices[0] = i0; | ||||
|       indices[1] = i1; | ||||
|       indices[2] = i2; | ||||
|       indices[3] = i3; | ||||
|       return coeff(indices); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const | ||||
|     { | ||||
|       array<Index, 5> indices; | ||||
|       indices[0] = i0; | ||||
|       indices[1] = i1; | ||||
|       indices[2] = i2; | ||||
|       indices[3] = i3; | ||||
|       indices[4] = i4; | ||||
|       return coeff(indices); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1) | ||||
|     { | ||||
|       array<Index, 2> indices; | ||||
|       indices[0] = i0; | ||||
|       indices[1] = i1; | ||||
|       return coeffRef(indices); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2) | ||||
|     { | ||||
|       array<Index, 3> indices; | ||||
|       indices[0] = i0; | ||||
|       indices[1] = i1; | ||||
|       indices[2] = i2; | ||||
|       return coeffRef(indices); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) | ||||
|     { | ||||
|       array<Index, 4> indices; | ||||
|       indices[0] = i0; | ||||
|       indices[1] = i1; | ||||
|       indices[2] = i2; | ||||
|       indices[3] = i3; | ||||
|       return coeffRef(indices); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4) | ||||
|     { | ||||
|       array<Index, 5> indices; | ||||
|       indices[0] = i0; | ||||
|       indices[1] = i1; | ||||
|       indices[2] = i2; | ||||
|       indices[3] = i3; | ||||
|       indices[4] = i4; | ||||
|       return coeffRef(indices); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     template <std::size_t NumIndices> EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const | ||||
|     { | ||||
|       const Dimensions& dims = this->dimensions(); | ||||
|       Index index = 0; | ||||
|       if (PlainObjectType::Options & RowMajor) { | ||||
|         index += indices[0]; | ||||
|         for (size_t i = 1; i < NumIndices; ++i) { | ||||
|           index = index * dims[i] + indices[i]; | ||||
|         } | ||||
|       } else { | ||||
|         index += indices[NumIndices-1]; | ||||
|         for (int i = NumIndices-2; i >= 0; --i) { | ||||
|           index = index * dims[i] + indices[i]; | ||||
|         } | ||||
|       } | ||||
|       return m_evaluator->coeff(index); | ||||
|     } | ||||
|     template <std::size_t NumIndices> EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) | ||||
|     { | ||||
|       const Dimensions& dims = this->dimensions(); | ||||
|       Index index = 0; | ||||
|       if (PlainObjectType::Options & RowMajor) { | ||||
|         index += indices[0]; | ||||
|         for (size_t i = 1; i < NumIndices; ++i) { | ||||
|           index = index * dims[i] + indices[i]; | ||||
|         } | ||||
|       } else { | ||||
|         index += indices[NumIndices-1]; | ||||
|         for (int i = NumIndices-2; i >= 0; --i) { | ||||
|           index = index * dims[i] + indices[i]; | ||||
|         } | ||||
|       } | ||||
|       return m_evaluator->coeffRef(index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const | ||||
|     { | ||||
|       return m_evaluator->coeff(index); | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) | ||||
|     { | ||||
|       return m_evaluator->coeffRef(index); | ||||
|     } | ||||
|  | ||||
|   private: | ||||
|     EIGEN_STRONG_INLINE void unrefEvaluator() { | ||||
|       if (m_evaluator) { | ||||
|         m_evaluator->decrRefCount(); | ||||
|         if (m_evaluator->refCount() == 0) { | ||||
|           delete m_evaluator; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|   internal::TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // evaluator for rvalues | ||||
| template<typename Derived, typename Device> | ||||
| struct TensorEvaluator<const TensorRef<Derived>, Device> | ||||
| { | ||||
|   typedef typename Derived::Index Index; | ||||
|   typedef typename Derived::Scalar Scalar; | ||||
|   typedef typename Derived::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef typename Derived::Dimensions Dimensions; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = false, | ||||
|     Layout = TensorRef<Derived>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef<Derived>& m, const Device&) | ||||
|       : m_ref(m) | ||||
|   { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { | ||||
|     return m_ref.coeff(index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { | ||||
|     return m_ref.coeffRef(index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return m_ref.data(); } | ||||
|  | ||||
|  protected: | ||||
|   TensorRef<Derived> m_ref; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // evaluator for lvalues | ||||
| template<typename Derived, typename Device> | ||||
| struct TensorEvaluator<TensorRef<Derived>, Device> : public TensorEvaluator<const TensorRef<Derived>, Device> | ||||
| { | ||||
|   typedef typename Derived::Index Index; | ||||
|   typedef typename Derived::Scalar Scalar; | ||||
|   typedef typename Derived::Scalar CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef typename Derived::Dimensions Dimensions; | ||||
|  | ||||
|   typedef TensorEvaluator<const TensorRef<Derived>, Device> Base; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = false, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(TensorRef<Derived>& m, const Device& d) : Base(m, d) | ||||
|   { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { | ||||
|     return this->m_ref.coeffRef(index); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_REF_H | ||||
							
								
								
									
										288
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										288
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,288 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com> | ||||
| //                    Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorReverse | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor reverse elements class. | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename ReverseDimensions, typename XprType> | ||||
| struct traits<TensorReverseOp<ReverseDimensions, | ||||
|                               XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename ReverseDimensions, typename XprType> | ||||
| struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorReverseOp<ReverseDimensions, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename ReverseDimensions, typename XprType> | ||||
| struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1, | ||||
|             typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type> | ||||
| { | ||||
|   typedef TensorReverseOp<ReverseDimensions, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| template<typename ReverseDimensions, typename XprType> | ||||
| class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, | ||||
|                                           XprType>, WriteAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind | ||||
|                                                                     StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp( | ||||
|       const XprType& expr, const ReverseDimensions& reverse_dims) | ||||
|       : m_xpr(expr), m_reverse_dims(reverse_dims) { } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const ReverseDimensions& reverse() const { return m_reverse_dims; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorReverseOp& operator = (const TensorReverseOp& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorReverseOp& operator = (const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const ReverseDimensions m_reverse_dims; | ||||
| }; | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename ReverseDimensions, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<ReverseDimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, | ||||
|                                                         const Device& device) | ||||
|       : m_impl(op.expression(), device), m_reverse(op.reverse()) | ||||
|   { | ||||
|     // Reversing a scalar isn't supported yet. It would be a no-op anyway. | ||||
|     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|     // Compute strides | ||||
|     m_dimensions = m_impl.dimensions(); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_strides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_strides[i] = m_strides[i-1] * m_dimensions[i-1]; | ||||
|       } | ||||
|     } else { | ||||
|       m_strides[NumDims-1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_strides[i] = m_strides[i+1] * m_dimensions[i+1]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex( | ||||
|       Index index) const { | ||||
|     eigen_assert(index < dimensions().TotalSize()); | ||||
|     Index inputIndex = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         Index idx = index / m_strides[i]; | ||||
|         index -= idx * m_strides[i]; | ||||
|         if (m_reverse[i]) { | ||||
|           idx = m_dimensions[i] - idx - 1; | ||||
|         } | ||||
|         inputIndex += idx * m_strides[i] ; | ||||
|       } | ||||
|       if (m_reverse[0]) { | ||||
|         inputIndex += (m_dimensions[0] - index - 1); | ||||
|       } else { | ||||
|         inputIndex += index; | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         Index idx = index / m_strides[i]; | ||||
|         index -= idx * m_strides[i]; | ||||
|         if (m_reverse[i]) { | ||||
|           idx = m_dimensions[i] - idx - 1; | ||||
|         } | ||||
|         inputIndex += idx * m_strides[i] ; | ||||
|       } | ||||
|       if (m_reverse[NumDims-1]) { | ||||
|         inputIndex += (m_dimensions[NumDims-1] - index - 1); | ||||
|       } else { | ||||
|         inputIndex += index; | ||||
|       } | ||||
|     } | ||||
|     return inputIndex; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff( | ||||
|       Index index) const  { | ||||
|     return m_impl.coeff(reverseIndex(index)); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     // TODO(ndjaitly): write a better packing routine that uses | ||||
|     // local structure. | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type | ||||
|                                                             values[PacketSize]; | ||||
|     for (int i = 0; i < PacketSize; ++i) { | ||||
|       values[i] = coeff(index+i); | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + | ||||
|                                      2 * TensorOpCost::MulCost<Index>() + | ||||
|                                      TensorOpCost::DivCost<Index>()); | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       if (m_reverse[i]) { | ||||
|         compute_cost += 2 * TensorOpCost::AddCost<Index>(); | ||||
|       } | ||||
|     } | ||||
|     return m_impl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|  protected: | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims> m_strides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   ReverseDimensions m_reverse; | ||||
| }; | ||||
|  | ||||
| // Eval as lvalue | ||||
|  | ||||
| template <typename ReverseDimensions, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device> | ||||
|     : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, | ||||
|                              Device> { | ||||
|   typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, | ||||
|                           Device> Base; | ||||
|   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<ReverseDimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, | ||||
|                                                         const Device& device) | ||||
|       : Base(op, device) {} | ||||
|  | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const Dimensions& dimensions() const { return this->m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { | ||||
|     return this->m_impl.coeffRef(this->reverseIndex(index)); | ||||
|   } | ||||
|  | ||||
|   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     // This code is pilfered from TensorMorphing.h | ||||
|     EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; | ||||
|     internal::pstore<CoeffReturnType, PacketReturnType>(values, x); | ||||
|     for (int i = 0; i < PacketSize; ++i) { | ||||
|       this->coeffRef(index+i) = values[i]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H | ||||
							
								
								
									
										287
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										287
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,287 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template <typename Op, typename XprType> | ||||
| struct traits<TensorScanOp<Op, XprType> > | ||||
|     : public traits<XprType> { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename Op, typename XprType> | ||||
| struct eval<TensorScanOp<Op, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorScanOp<Op, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Op, typename XprType> | ||||
| struct nested<TensorScanOp<Op, XprType>, 1, | ||||
|             typename eval<TensorScanOp<Op, XprType> >::type> | ||||
| { | ||||
|   typedef TensorScanOp<Op, XprType> type; | ||||
| }; | ||||
| } // end namespace internal | ||||
|  | ||||
| /** \class TensorScan | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor scan class. | ||||
|   */ | ||||
| template <typename Op, typename XprType> | ||||
| class TensorScanOp | ||||
|     : public TensorBase<TensorScanOp<Op, XprType>, ReadOnlyAccessors> { | ||||
| public: | ||||
|   typedef typename Eigen::internal::traits<TensorScanOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorScanOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorScanOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorScanOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp( | ||||
|       const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op()) | ||||
|       : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const Index axis() const { return m_axis; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const XprType& expression() const { return m_expr; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const Op accumulator() const { return m_accumulator; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   bool exclusive() const { return m_exclusive; } | ||||
|  | ||||
| protected: | ||||
|   typename XprType::Nested m_expr; | ||||
|   const Index m_axis; | ||||
|   const Op m_accumulator; | ||||
|   const bool m_exclusive; | ||||
| }; | ||||
|  | ||||
| template <typename Self, typename Reducer, typename Device> | ||||
| struct ScanLauncher; | ||||
|  | ||||
| // Eval as rvalue | ||||
| template <typename Op, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { | ||||
|  | ||||
|   typedef TensorScanOp<Op, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false, | ||||
|     RawAccess = true | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, | ||||
|                                                         const Device& device) | ||||
|       : m_impl(op.expression(), device), | ||||
|         m_device(device), | ||||
|         m_exclusive(op.exclusive()), | ||||
|         m_accumulator(op.accumulator()), | ||||
|         m_size(m_impl.dimensions()[op.axis()]), | ||||
|         m_stride(1), | ||||
|         m_output(NULL) { | ||||
|  | ||||
|     // Accumulating a scalar isn't supported. | ||||
|     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     eigen_assert(op.axis() >= 0 && op.axis() < NumDims); | ||||
|  | ||||
|     // Compute stride of scan axis | ||||
|     const Dimensions& dims = m_impl.dimensions(); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = 0; i < op.axis(); ++i) { | ||||
|         m_stride = m_stride * dims[i]; | ||||
|       } | ||||
|     } else { | ||||
|       for (int i = NumDims - 1; i > op.axis(); --i) { | ||||
|         m_stride = m_stride * dims[i]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { | ||||
|     return m_impl.dimensions(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& stride() const { | ||||
|     return m_stride; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const { | ||||
|     return m_size; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op& accumulator() const { | ||||
|     return m_accumulator; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const { | ||||
|     return m_exclusive; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& inner() const { | ||||
|     return m_impl; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { | ||||
|     return m_device; | ||||
|   } | ||||
|  | ||||
|   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     ScanLauncher<Self, Op, Device> launcher; | ||||
|     if (data) { | ||||
|       launcher(*this, data); | ||||
|       return false; | ||||
|     } | ||||
|  | ||||
|     const Index total_size = internal::array_prod(dimensions()); | ||||
|     m_output = static_cast<CoeffReturnType*>(m_device.allocate(total_size * sizeof(Scalar))); | ||||
|     launcher(*this, m_output); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { | ||||
|     return internal::ploadt<PacketReturnType, LoadMode>(m_output + index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const | ||||
|   { | ||||
|     return m_output; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_output[index]; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { | ||||
|     return TensorOpCost(sizeof(CoeffReturnType), 0, 0); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     if (m_output != NULL) { | ||||
|       m_device.deallocate(m_output); | ||||
|       m_output = NULL; | ||||
|     } | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
| protected: | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
|   const Device& m_device; | ||||
|   const bool m_exclusive; | ||||
|   Op m_accumulator; | ||||
|   const Index m_size; | ||||
|   Index m_stride; | ||||
|   CoeffReturnType* m_output; | ||||
| }; | ||||
|  | ||||
| // CPU implementation of scan | ||||
| // TODO(ibab) This single-threaded implementation should be parallelized, | ||||
| // at least by running multiple scans at the same time. | ||||
| template <typename Self, typename Reducer, typename Device> | ||||
| struct ScanLauncher { | ||||
|   void operator()(Self& self, typename Self::CoeffReturnType *data) { | ||||
|     Index total_size = internal::array_prod(self.dimensions()); | ||||
|  | ||||
|     // We fix the index along the scan axis to 0 and perform a | ||||
|     // scan per remaining entry. The iteration is split into two nested | ||||
|     // loops to avoid an integer division by keeping track of each idx1 and idx2. | ||||
|     for (Index idx1 = 0; idx1 < total_size; idx1 += self.stride() * self.size()) { | ||||
|       for (Index idx2 = 0; idx2 < self.stride(); idx2++) { | ||||
|         // Calculate the starting offset for the scan | ||||
|         Index offset = idx1 + idx2; | ||||
|  | ||||
|         // Compute the scan along the axis, starting at the calculated offset | ||||
|         typename Self::CoeffReturnType accum = self.accumulator().initialize(); | ||||
|         for (Index idx3 = 0; idx3 < self.size(); idx3++) { | ||||
|           Index curr = offset + idx3 * self.stride(); | ||||
|  | ||||
|           if (self.exclusive()) { | ||||
|             data[curr] = self.accumulator().finalize(accum); | ||||
|             self.accumulator().reduce(self.inner().coeff(curr), &accum); | ||||
|           } else { | ||||
|             self.accumulator().reduce(self.inner().coeff(curr), &accum); | ||||
|             data[curr] = self.accumulator().finalize(accum); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) | ||||
|  | ||||
| // GPU implementation of scan | ||||
| // TODO(ibab) This placeholder implementation performs multiple scans in | ||||
| // parallel, but it would be better to use a parallel scan algorithm and | ||||
| // optimize memory access. | ||||
| template <typename Self, typename Reducer> | ||||
| __global__ void ScanKernel(Self self, Index total_size, typename Self::CoeffReturnType* data) { | ||||
|   // Compute offset as in the CPU version | ||||
|   Index val = threadIdx.x + blockIdx.x * blockDim.x; | ||||
|   Index offset = (val / self.stride()) * self.stride() * self.size() + val % self.stride(); | ||||
|  | ||||
|   if (offset + (self.size() - 1) * self.stride() < total_size) { | ||||
|     // Compute the scan along the axis, starting at the calculated offset | ||||
|     typename Self::CoeffReturnType accum = self.accumulator().initialize(); | ||||
|     for (Index idx = 0; idx < self.size(); idx++) { | ||||
|       Index curr = offset + idx * self.stride(); | ||||
|       if (self.exclusive()) { | ||||
|         data[curr] = self.accumulator().finalize(accum); | ||||
|         self.accumulator().reduce(self.inner().coeff(curr), &accum); | ||||
|       } else { | ||||
|         self.accumulator().reduce(self.inner().coeff(curr), &accum); | ||||
|         data[curr] = self.accumulator().finalize(accum); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   __syncthreads(); | ||||
|  | ||||
| } | ||||
|  | ||||
| template <typename Self, typename Reducer> | ||||
| struct ScanLauncher<Self, Reducer, GpuDevice> { | ||||
|   void operator()(const Self& self, typename Self::CoeffReturnType* data) { | ||||
|      Index total_size = internal::array_prod(self.dimensions()); | ||||
|      Index num_blocks = (total_size / self.size() + 63) / 64; | ||||
|      Index block_size = 64; | ||||
|      LAUNCH_CUDA_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data); | ||||
|   } | ||||
| }; | ||||
| #endif  // EIGEN_USE_GPU && __CUDACC__ | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_TENSOR_TENSOR_SCAN_H | ||||
							
								
								
									
										264
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										264
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,264 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorShuffling | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor shuffling class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename Shuffle, typename XprType> | ||||
| struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename Shuffle, typename XprType> | ||||
| struct eval<TensorShufflingOp<Shuffle, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorShufflingOp<Shuffle, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Shuffle, typename XprType> | ||||
| struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type> | ||||
| { | ||||
|   typedef TensorShufflingOp<Shuffle, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename Shuffle, typename XprType> | ||||
| class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> > | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle) | ||||
|       : m_xpr(expr), m_shuffle(shuffle) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const Shuffle& shufflePermutation() const { return m_shuffle; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const TensorShufflingOp& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const Shuffle m_shuffle; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename Shuffle, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorShufflingOp<Shuffle, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = (internal::packet_traits<Scalar>::size > 1), | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device) | ||||
|   { | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     const Shuffle& shuffle = op.shufflePermutation(); | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       m_dimensions[i] = input_dims[shuffle[i]]; | ||||
|     } | ||||
|  | ||||
|     array<Index, NumDims> inputStrides; | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       inputStrides[0] = 1; | ||||
|       m_outputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         inputStrides[i] = inputStrides[i - 1] * input_dims[i - 1]; | ||||
|         m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; | ||||
|       } | ||||
|     } else { | ||||
|       inputStrides[NumDims - 1] = 1; | ||||
|       m_outputStrides[NumDims - 1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1]; | ||||
|         m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       m_inputStrides[i] = inputStrides[shuffle[i]]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_impl.coeff(srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|     for (int i = 0; i < PacketSize; ++i) { | ||||
|       values[i] = coeff(index+i); | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + | ||||
|                                            2 * TensorOpCost::MulCost<Index>() + | ||||
|                                            TensorOpCost::DivCost<Index>()); | ||||
|     return m_impl.costPerCoeff(vectorized) + | ||||
|            TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { | ||||
|     Index inputIndex = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         inputIndex += idx * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       return inputIndex + index * m_inputStrides[0]; | ||||
|     } else { | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         inputIndex += idx * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       return inputIndex + index * m_inputStrides[NumDims - 1]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims> m_outputStrides; | ||||
|   array<Index, NumDims> m_inputStrides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as lvalue | ||||
| template<typename Shuffle, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device> | ||||
|     : public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base; | ||||
|  | ||||
|   typedef TensorShufflingOp<Shuffle, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = (internal::packet_traits<Scalar>::size > 1), | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : Base(op, device) | ||||
|   { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) | ||||
|   { | ||||
|     return this->m_impl.coeffRef(this->srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   template <int StoreMode> EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|  | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|     internal::pstore<CoeffReturnType, PacketReturnType>(values, x); | ||||
|     for (int i = 0; i < PacketSize; ++i) { | ||||
|       this->coeffRef(index+i) = values[i]; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H | ||||
							
								
								
									
										146
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										146
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,146 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // Copyright (C) 2014-2015 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSORSTORAGE_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSORSTORAGE_H | ||||
|  | ||||
| #ifdef EIGEN_TENSOR_STORAGE_CTOR_PLUGIN | ||||
|   #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN EIGEN_TENSOR_STORAGE_CTOR_PLUGIN; | ||||
| #else | ||||
|   #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN | ||||
| #endif | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class TensorStorage | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Stores the data of a tensor | ||||
|   * | ||||
|   * This class stores the data of fixed-size, dynamic-size or mixed tensors | ||||
|   * in a way as compact as possible. | ||||
|   * | ||||
|   * \sa Tensor | ||||
|   */ | ||||
| template<typename T, typename Dimensions, int Options> class TensorStorage; | ||||
|  | ||||
|  | ||||
| // Pure fixed-size storage | ||||
| template<typename T, typename FixedDimensions, int Options_> | ||||
| class TensorStorage | ||||
| { | ||||
|  private: | ||||
|   static const std::size_t Size = FixedDimensions::total_size; | ||||
|  | ||||
|   // Allocate an array of size at least one to prevent compiler warnings. | ||||
|   static const std::size_t MinSize = max_n_1<Size>::size; | ||||
|   EIGEN_ALIGN_MAX T m_data[MinSize]; | ||||
|  | ||||
|   FixedDimensions m_dimensions; | ||||
|  | ||||
|  public: | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE TensorStorage() { | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE T *data() { return m_data; } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE const T *data() const { return m_data; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE const FixedDimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); } | ||||
| }; | ||||
|  | ||||
|  | ||||
| // pure dynamic | ||||
| template<typename T, typename IndexType, int NumIndices_, int Options_> | ||||
| class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> | ||||
| { | ||||
|   public: | ||||
|     typedef IndexType Index; | ||||
|     typedef DSizes<IndexType, NumIndices_> Dimensions; | ||||
|     typedef TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> Self; | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() { | ||||
|       if (NumIndices_ == 0) { | ||||
| 	m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1); | ||||
|       } | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert) | ||||
|       : m_data(0), m_dimensions(internal::template repeat<NumIndices_, Index>(0)) {} | ||||
|     EIGEN_DEVICE_FUNC TensorStorage(Index size, const array<Index, NumIndices_>& dimensions) | ||||
|         : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions) | ||||
|       { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|     template <typename... DenseIndex> | ||||
|     EIGEN_DEVICE_FUNC TensorStorage(DenseIndex... indices) : m_dimensions(indices...) { | ||||
|       m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(m_dimensions)); | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC TensorStorage(const Self& other) | ||||
|       : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions))) | ||||
|       , m_dimensions(other.m_dimensions) | ||||
|     { | ||||
|       internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data); | ||||
|     } | ||||
|     EIGEN_DEVICE_FUNC Self& operator=(const Self& other) | ||||
|     { | ||||
|       if (this != &other) { | ||||
|         Self tmp(other); | ||||
|         this->swap(tmp); | ||||
|       } | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC  ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); } | ||||
|     EIGEN_DEVICE_FUNC  void swap(Self& other) | ||||
|     { numext::swap(m_data,other.m_data); numext::swap(m_dimensions,other.m_dimensions); } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {return m_dimensions;} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions) | ||||
|     { | ||||
|       const Index currentSz = internal::array_prod(m_dimensions); | ||||
|       if(size != currentSz) | ||||
|       { | ||||
|         internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, currentSz); | ||||
|         if (size) | ||||
|           m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size); | ||||
|         else if (NumIndices_ == 0) { | ||||
| 	  m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1); | ||||
| 	} | ||||
| 	else  | ||||
|           m_data = 0; | ||||
|         EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) | ||||
|       } | ||||
|       m_dimensions = nbDimensions; | ||||
|     } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; } | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } | ||||
|  | ||||
|  private: | ||||
|   T *m_data; | ||||
|   Dimensions m_dimensions; | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSORSTORAGE_H | ||||
							
								
								
									
										338
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										338
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,338 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorStriding | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Tensor striding class. | ||||
|   * | ||||
|   * | ||||
|   */ | ||||
| namespace internal { | ||||
| template<typename Strides, typename XprType> | ||||
| struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<typename Strides, typename XprType> | ||||
| struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorStridingOp<Strides, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Strides, typename XprType> | ||||
| struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type> | ||||
| { | ||||
|   typedef TensorStridingOp<Strides, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
|  | ||||
|  | ||||
| template<typename Strides, typename XprType> | ||||
| class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> > | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims) | ||||
|       : m_xpr(expr), m_dims(dims) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const Strides& strides() const { return m_dims; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorStridingOp& operator = (const TensorStridingOp& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|     template<typename OtherDerived> | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     EIGEN_STRONG_INLINE TensorStridingOp& operator = (const OtherDerived& other) | ||||
|     { | ||||
|       typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign; | ||||
|       Assign assign(*this, other); | ||||
|       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); | ||||
|       return *this; | ||||
|     } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const Strides m_dims; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<typename Strides, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorStridingOp<Strides, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device) | ||||
|   { | ||||
|     m_dimensions = m_impl.dimensions(); | ||||
|     for (int i = 0; i < NumDims; ++i) { | ||||
|       m_dimensions[i] = ceilf(static_cast<float>(m_dimensions[i]) / op.strides()[i]); | ||||
|     } | ||||
|  | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_outputStrides[0] = 1; | ||||
|       m_inputStrides[0] = 1; | ||||
|       for (int i = 1; i < NumDims; ++i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; | ||||
|         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; | ||||
|         m_inputStrides[i-1] *= op.strides()[i-1]; | ||||
|       } | ||||
|       m_inputStrides[NumDims-1] *= op.strides()[NumDims-1]; | ||||
|     } else {  // RowMajor | ||||
|       m_outputStrides[NumDims-1] = 1; | ||||
|       m_inputStrides[NumDims-1] = 1; | ||||
|       for (int i = NumDims - 2; i >= 0; --i) { | ||||
|         m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; | ||||
|         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; | ||||
|         m_inputStrides[i+1] *= op.strides()[i+1]; | ||||
|       } | ||||
|       m_inputStrides[0] *= op.strides()[0]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     return m_impl.coeff(srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     Index inputIndices[] = {0, 0}; | ||||
|     Index indices[] = {index, index + PacketSize - 1}; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx0 = indices[0] / m_outputStrides[i]; | ||||
|         const Index idx1 = indices[1] / m_outputStrides[i]; | ||||
|         inputIndices[0] += idx0 * m_inputStrides[i]; | ||||
|         inputIndices[1] += idx1 * m_inputStrides[i]; | ||||
|         indices[0] -= idx0 * m_outputStrides[i]; | ||||
|         indices[1] -= idx1 * m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndices[0] += indices[0] * m_inputStrides[0]; | ||||
|       inputIndices[1] += indices[1] * m_inputStrides[0]; | ||||
|     } else {  // RowMajor | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx0 = indices[0] / m_outputStrides[i]; | ||||
|         const Index idx1 = indices[1] / m_outputStrides[i]; | ||||
|         inputIndices[0] += idx0 * m_inputStrides[i]; | ||||
|         inputIndices[1] += idx1 * m_inputStrides[i]; | ||||
|         indices[0] -= idx0 * m_outputStrides[i]; | ||||
|         indices[1] -= idx1 * m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndices[0] += indices[0] * m_inputStrides[NumDims-1]; | ||||
|       inputIndices[1] += indices[1] * m_inputStrides[NumDims-1]; | ||||
|     } | ||||
|     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { | ||||
|       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); | ||||
|       return rslt; | ||||
|     } | ||||
|     else { | ||||
|       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|       values[0] = m_impl.coeff(inputIndices[0]); | ||||
|       values[PacketSize-1] = m_impl.coeff(inputIndices[1]); | ||||
|       for (int i = 1; i < PacketSize-1; ++i) { | ||||
|         values[i] = coeff(index+i); | ||||
|       } | ||||
|       PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|       return rslt; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { | ||||
|     double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() + | ||||
|                                            TensorOpCost::MulCost<Index>() + | ||||
|                                            TensorOpCost::DivCost<Index>()) + | ||||
|         TensorOpCost::MulCost<Index>(); | ||||
|     if (vectorized) { | ||||
|       compute_cost *= 2;  // packet() computes two indices | ||||
|     } | ||||
|     const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1); | ||||
|     return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) + | ||||
|         // Computation is not vectorized per se, but it is done once per packet. | ||||
|         TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const | ||||
|   { | ||||
|     Index inputIndex = 0; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         inputIndex += idx * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndex += index * m_inputStrides[0]; | ||||
|     } else {  // RowMajor | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx = index / m_outputStrides[i]; | ||||
|         inputIndex += idx * m_inputStrides[i]; | ||||
|         index -= idx * m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndex += index * m_inputStrides[NumDims-1]; | ||||
|     } | ||||
|     return inputIndex; | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|   array<Index, NumDims> m_outputStrides; | ||||
|   array<Index, NumDims> m_inputStrides; | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as lvalue | ||||
| template<typename Strides, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device> | ||||
|     : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorStridingOp<Strides, ArgType> XprType; | ||||
|   typedef TensorEvaluator<const XprType, Device> Base; | ||||
|   //  typedef typename XprType::Index Index; | ||||
|   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   //  typedef DSizes<Index, NumDims> Dimensions; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false,  // to be implemented | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : Base(op, device) { } | ||||
|  | ||||
|   typedef typename XprType::Index Index; | ||||
|   typedef typename XprType::Scalar Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) | ||||
|   { | ||||
|     return this->m_impl.coeffRef(this->srcCoeff(index)); | ||||
|   } | ||||
|  | ||||
|   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void writePacket(Index index, const PacketReturnType& x) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize()); | ||||
|  | ||||
|     Index inputIndices[] = {0, 0}; | ||||
|     Index indices[] = {index, index + PacketSize - 1}; | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       for (int i = NumDims - 1; i > 0; --i) { | ||||
|         const Index idx0 = indices[0] / this->m_outputStrides[i]; | ||||
|         const Index idx1 = indices[1] / this->m_outputStrides[i]; | ||||
|         inputIndices[0] += idx0 * this->m_inputStrides[i]; | ||||
|         inputIndices[1] += idx1 * this->m_inputStrides[i]; | ||||
|         indices[0] -= idx0 * this->m_outputStrides[i]; | ||||
|         indices[1] -= idx1 * this->m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndices[0] += indices[0] * this->m_inputStrides[0]; | ||||
|       inputIndices[1] += indices[1] * this->m_inputStrides[0]; | ||||
|     } else {  // RowMajor | ||||
|       for (int i = 0; i < NumDims - 1; ++i) { | ||||
|         const Index idx0 = indices[0] / this->m_outputStrides[i]; | ||||
|         const Index idx1 = indices[1] / this->m_outputStrides[i]; | ||||
|         inputIndices[0] += idx0 * this->m_inputStrides[i]; | ||||
|         inputIndices[1] += idx1 * this->m_inputStrides[i]; | ||||
|         indices[0] -= idx0 * this->m_outputStrides[i]; | ||||
|         indices[1] -= idx1 * this->m_outputStrides[i]; | ||||
|       } | ||||
|       inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1]; | ||||
|       inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1]; | ||||
|     } | ||||
|     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { | ||||
|       this->m_impl.template writePacket<Unaligned>(inputIndices[0], x); | ||||
|     } | ||||
|     else { | ||||
|       EIGEN_ALIGN_MAX Scalar values[PacketSize]; | ||||
|       internal::pstore<Scalar, PacketReturnType>(values, x); | ||||
|       this->m_impl.coeffRef(inputIndices[0]) = values[0]; | ||||
|       this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1]; | ||||
|       for (int i = 1; i < PacketSize-1; ++i) { | ||||
|         this->coeffRef(index+i) = values[i]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H | ||||
							
								
								
									
										82
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: eigen@codeplay.com | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| // General include header of SYCL target for Tensor Module | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H | ||||
|  | ||||
| #ifdef EIGEN_USE_SYCL | ||||
|  | ||||
| // global pointer to set different attribute state for a class | ||||
| template <class T> | ||||
| struct MakeGlobalPointer { | ||||
|   typedef typename cl::sycl::global_ptr<T>::pointer_t Type; | ||||
| }; | ||||
|  | ||||
| // global pointer to set different attribute state for a class | ||||
| template <class T> | ||||
| struct MakeLocalPointer { | ||||
|   typedef typename cl::sycl::local_ptr<T>::pointer_t Type; | ||||
| }; | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace TensorSycl { | ||||
| namespace internal { | ||||
|  | ||||
| /// This struct is used for special expression nodes with no operations (for example assign and selectOP). | ||||
|   struct NoOP; | ||||
|  | ||||
| template<bool IsConst, typename T> struct GetType{ | ||||
|   typedef const T Type; | ||||
| }; | ||||
| template<typename T> struct GetType<false, T>{ | ||||
|   typedef T Type; | ||||
| }; | ||||
|  | ||||
| } | ||||
| } | ||||
| } | ||||
|  | ||||
| // tuple construction | ||||
| #include "TensorSyclTuple.h" | ||||
|  | ||||
| // counting number of leaf at compile time | ||||
| #include "TensorSyclLeafCount.h" | ||||
|  | ||||
| // The index PlaceHolder takes the actual expression and replaces the actual | ||||
| // data on it with the place holder. It uses the same pre-order expression tree | ||||
| // traverse as the leaf count in order to give the right access number to each | ||||
| // node in the expression | ||||
| #include "TensorSyclPlaceHolderExpr.h" | ||||
|  | ||||
| // creation of an accessor tuple from a tuple of SYCL buffers | ||||
| #include "TensorSyclExtractAccessor.h" | ||||
|  | ||||
| // this is used to change the address space type in tensor map for GPU | ||||
| #include "TensorSyclConvertToDeviceExpression.h" | ||||
|  | ||||
| // this is used to extract the functors | ||||
| #include "TensorSyclExtractFunctors.h" | ||||
|  | ||||
| // this is used to create tensormap on the device | ||||
| // this is used to construct the expression on the device | ||||
| #include "TensorSyclExprConstructor.h" | ||||
|  | ||||
| /// this is used for extracting tensor reduction | ||||
| #include "TensorReductionSycl.h" | ||||
|  | ||||
| // kernel execution using fusion | ||||
| #include "TensorSyclRun.h" | ||||
|  | ||||
| #endif  // end of EIGEN_USE_SYCL | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H | ||||
							
								
								
									
										121
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,121 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensorSyclConvertToDeviceExpression.h | ||||
|  * | ||||
|  * \brief: | ||||
|  *  Conversion from host pointer to device pointer | ||||
|  *  inside leaf nodes of the expression. | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace TensorSycl { | ||||
| namespace internal { | ||||
|  | ||||
| /// \struct ConvertToDeviceExpression | ||||
| /// \brief This struct is used to convert the MakePointer in the host expression | ||||
| /// to the MakeGlobalPointer for the device expression. For the leafNodes | ||||
| /// containing the pointer. This is due to the fact that the address space of | ||||
| /// the pointer T* is different on the host and the device. | ||||
| template <typename Expr> | ||||
| struct ConvertToDeviceExpression; | ||||
|  | ||||
| template<template<class...> class NonOpCategory, bool IsConst, typename... Args> | ||||
| struct NonOpConversion{ | ||||
|   typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type...> >::Type Type; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<template<class, template <class> class > class NonOpCategory, bool IsConst, typename Args> | ||||
| struct DeviceConvertor{ | ||||
|   typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type, MakeGlobalPointer> >::Type Type; | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ConvertToDeviceExpression struct when the node | ||||
| /// type is TensorMap | ||||
| #define TENSORMAPCONVERT(CVQual)\ | ||||
| template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_>\ | ||||
| struct ConvertToDeviceExpression<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_> > {\ | ||||
|   typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\ | ||||
| }; | ||||
|  | ||||
| TENSORMAPCONVERT(const) | ||||
| TENSORMAPCONVERT() | ||||
| #undef TENSORMAPCONVERT | ||||
|  | ||||
| /// specialisation of the \ref ConvertToDeviceExpression struct when the node | ||||
| /// type is TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, TensorBroadcastingOp | ||||
| #define CATEGORYCONVERT(CVQual)\ | ||||
| template <template<class, class...> class Category, typename OP, typename... subExprs>\ | ||||
| struct ConvertToDeviceExpression<CVQual Category<OP, subExprs...> > {\ | ||||
|   typedef CVQual Category<OP, typename ConvertToDeviceExpression<subExprs>::Type... > Type;\ | ||||
| }; | ||||
| CATEGORYCONVERT(const) | ||||
| CATEGORYCONVERT() | ||||
| #undef CATEGORYCONVERT | ||||
|  | ||||
|  | ||||
| /// specialisation of the \ref ConvertToDeviceExpression struct when the node | ||||
| /// type is  TensorCwiseSelectOp | ||||
| #define SELECTOPCONVERT(CVQual, Res)\ | ||||
| template <typename IfExpr, typename ThenExpr, typename ElseExpr>\ | ||||
| struct ConvertToDeviceExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >\ | ||||
| : NonOpConversion<TensorSelectOp, Res, IfExpr, ThenExpr, ElseExpr> {}; | ||||
| SELECTOPCONVERT(const, true) | ||||
| SELECTOPCONVERT(, false) | ||||
| #undef SELECTOPCONVERT | ||||
|  | ||||
| /// specialisation of the \ref ConvertToDeviceExpression struct when the node | ||||
| /// type is const AssingOP | ||||
| #define ASSIGNCONVERT(CVQual, Res)\ | ||||
| template <typename LHSExpr, typename RHSExpr>\ | ||||
| struct ConvertToDeviceExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr> >\ | ||||
| : NonOpConversion<TensorAssignOp, Res, LHSExpr, RHSExpr>{}; | ||||
|  | ||||
| ASSIGNCONVERT(const, true) | ||||
| ASSIGNCONVERT(, false) | ||||
| #undef ASSIGNCONVERT | ||||
|  | ||||
| /// specialisation of the \ref ConvertToDeviceExpression struct when the node | ||||
| /// type is either TensorForcedEvalOp or TensorEvalToOp | ||||
| #define KERNELBROKERCONVERT(CVQual, Res, ExprNode)\ | ||||
| template <typename Expr>\ | ||||
| struct ConvertToDeviceExpression<CVQual ExprNode<Expr> > \ | ||||
| : DeviceConvertor<ExprNode, Res, Expr>{}; | ||||
|  | ||||
| KERNELBROKERCONVERT(const, true, TensorForcedEvalOp) | ||||
| KERNELBROKERCONVERT(, false, TensorForcedEvalOp) | ||||
| KERNELBROKERCONVERT(const, true, TensorEvalToOp) | ||||
| KERNELBROKERCONVERT(, false, TensorEvalToOp) | ||||
| #undef KERNELBROKERCONVERT | ||||
|  | ||||
| /// specialisation of the \ref ConvertToDeviceExpression struct when the node type is TensorReductionOp | ||||
| #define KERNELBROKERCONVERTREDUCTION(CVQual)\ | ||||
| template <typename OP, typename Dim, typename subExpr, template <class> class MakePointer_>\ | ||||
| struct ConvertToDeviceExpression<CVQual TensorReductionOp<OP, Dim, subExpr, MakePointer_> > {\ | ||||
|   typedef CVQual TensorReductionOp<OP, Dim, typename ConvertToDeviceExpression<subExpr>::Type, MakeGlobalPointer> Type;\ | ||||
| }; | ||||
|  | ||||
| KERNELBROKERCONVERTREDUCTION(const) | ||||
| KERNELBROKERCONVERTREDUCTION() | ||||
| #undef KERNELBROKERCONVERTREDUCTION | ||||
|  | ||||
| }  // namespace internal | ||||
| }  // namespace TensorSycl | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX1 | ||||
							
								
								
									
										239
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										239
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,239 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensorSyclExprConstructor.h | ||||
|  * | ||||
|  * \brief: | ||||
|  *  This file re-create an expression on the SYCL device in order | ||||
|  *  to use the original tensor evaluator. | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace TensorSycl { | ||||
| namespace internal { | ||||
| /// this class is used by EvalToOp in order to create an lhs expression which is | ||||
| /// a pointer from an accessor on device-only buffer | ||||
| template <typename PtrType, size_t N, typename... Params> | ||||
| struct EvalToLHSConstructor { | ||||
|   PtrType expr; | ||||
|   EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t): expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {} | ||||
| }; | ||||
|  | ||||
| /// struct ExprConstructor is used to reconstruct the expression on the device and | ||||
| /// recreate the expression with MakeGlobalPointer containing the device address | ||||
| /// space for the TensorMap pointers used in eval function. | ||||
| /// It receives the original expression type, the functor of the node, the tuple | ||||
| /// of accessors, and the device expression type to re-instantiate the | ||||
| /// expression tree for the device | ||||
| template <typename OrigExpr, typename IndexExpr, typename... Params> | ||||
| struct ExprConstructor; | ||||
|  | ||||
| /// specialisation of the \ref ExprConstructor struct when the node type is | ||||
| /// TensorMap | ||||
| #define TENSORMAP(CVQual)\ | ||||
| template <typename Scalar_, int Options_, int Options2_, int Options3_, int NumIndices_, typename IndexType_,\ | ||||
| template <class> class MakePointer_, size_t N, typename... Params>\ | ||||
| struct ExprConstructor< CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer>,\ | ||||
| CVQual PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options3_, MakePointer_>, N>, Params...>{\ | ||||
|   typedef  CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer>  Type;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\ | ||||
| }; | ||||
|  | ||||
| TENSORMAP(const) | ||||
| TENSORMAP() | ||||
| #undef TENSORMAP | ||||
|  | ||||
| #define UNARYCATEGORY(CVQual)\ | ||||
| template <template<class, class> class UnaryCategory, typename OP, typename OrigRHSExpr, typename RHSExpr, typename... Params>\ | ||||
| struct ExprConstructor<CVQual UnaryCategory<OP, OrigRHSExpr>, CVQual UnaryCategory<OP, RHSExpr>, Params...> {\ | ||||
|   typedef  ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_type;\ | ||||
|   my_type rhsExpr;\ | ||||
|   typedef CVQual UnaryCategory<OP, typename my_type::Type> Type;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}\ | ||||
| }; | ||||
|  | ||||
| UNARYCATEGORY(const) | ||||
| UNARYCATEGORY() | ||||
| #undef UNARYCATEGORY | ||||
|  | ||||
| /// specialisation of the \ref ExprConstructor struct when the node type is | ||||
| /// TensorBinaryOp | ||||
| #define BINARYCATEGORY(CVQual)\ | ||||
| template <template<class, class, class> class BinaryCategory, typename OP, typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,\ | ||||
| typename RHSExpr, typename... Params>\ | ||||
| struct ExprConstructor<CVQual BinaryCategory<OP, OrigLHSExpr, OrigRHSExpr>,  CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Params...> {\ | ||||
|   typedef  ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\ | ||||
|   typedef  ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\ | ||||
|   typedef  CVQual BinaryCategory<OP, typename my_left_type::Type, typename my_right_type::Type> Type;\ | ||||
|   my_left_type lhsExpr;\ | ||||
|   my_right_type rhsExpr;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : lhsExpr(funcD.lhsExpr, t),rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}\ | ||||
| }; | ||||
|  | ||||
| BINARYCATEGORY(const) | ||||
| BINARYCATEGORY() | ||||
| #undef BINARYCATEGORY | ||||
|  | ||||
| /// specialisation of the \ref ExprConstructor struct when the node type is | ||||
| /// TensorCwiseTernaryOp | ||||
| #define TERNARYCATEGORY(CVQual)\ | ||||
| template <template <class, class, class, class> class TernaryCategory, typename OP, typename OrigArg1Expr, typename OrigArg2Expr,typename OrigArg3Expr,\ | ||||
| typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename... Params>\ | ||||
| struct ExprConstructor<CVQual TernaryCategory<OP, OrigArg1Expr, OrigArg2Expr, OrigArg3Expr>, CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Params...> {\ | ||||
|   typedef ExprConstructor<OrigArg1Expr, Arg1Expr, Params...> my_arg1_type;\ | ||||
|   typedef ExprConstructor<OrigArg2Expr, Arg2Expr, Params...> my_arg2_type;\ | ||||
|   typedef ExprConstructor<OrigArg3Expr, Arg3Expr, Params...> my_arg3_type;\ | ||||
|   typedef  CVQual TernaryCategory<OP, typename my_arg1_type::Type, typename my_arg2_type::Type, typename my_arg3_type::Type> Type;\ | ||||
|   my_arg1_type arg1Expr;\ | ||||
|   my_arg2_type arg2Expr;\ | ||||
|   my_arg3_type arg3Expr;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &funcD,const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : arg1Expr(funcD.arg1Expr, t), arg2Expr(funcD.arg2Expr, t), arg3Expr(funcD.arg3Expr, t), expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {}\ | ||||
| }; | ||||
|  | ||||
| TERNARYCATEGORY(const) | ||||
| TERNARYCATEGORY() | ||||
| #undef TERNARYCATEGORY | ||||
|  | ||||
| /// specialisation of the \ref ExprConstructor struct when the node type is | ||||
| /// TensorCwiseSelectOp | ||||
| #define SELECTOP(CVQual)\ | ||||
| template <typename OrigIfExpr, typename OrigThenExpr, typename OrigElseExpr, typename IfExpr, typename ThenExpr, typename ElseExpr, typename... Params>\ | ||||
| struct ExprConstructor< CVQual TensorSelectOp<OrigIfExpr, OrigThenExpr, OrigElseExpr>, CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Params...> {\ | ||||
|   typedef  ExprConstructor<OrigIfExpr, IfExpr, Params...> my_if_type;\ | ||||
|   typedef  ExprConstructor<OrigThenExpr, ThenExpr, Params...> my_then_type;\ | ||||
|   typedef  ExprConstructor<OrigElseExpr, ElseExpr, Params...> my_else_type;\ | ||||
|   typedef CVQual TensorSelectOp<typename my_if_type::Type, typename my_then_type::Type, typename my_else_type::Type> Type;\ | ||||
|   my_if_type ifExpr;\ | ||||
|   my_then_type thenExpr;\ | ||||
|   my_else_type elseExpr;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : ifExpr(funcD.ifExpr, t), thenExpr(funcD.thenExpr, t), elseExpr(funcD.elseExpr, t), expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {}\ | ||||
| }; | ||||
|  | ||||
| SELECTOP(const) | ||||
| SELECTOP() | ||||
| #undef SELECTOP | ||||
|  | ||||
| /// specialisation of the \ref ExprConstructor struct when the node type is | ||||
| /// const TensorAssignOp | ||||
| #define ASSIGN(CVQual)\ | ||||
| template <typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr, typename RHSExpr, typename... Params>\ | ||||
| struct ExprConstructor<CVQual TensorAssignOp<OrigLHSExpr, OrigRHSExpr>,  CVQual TensorAssignOp<LHSExpr, RHSExpr>, Params...> {\ | ||||
|   typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\ | ||||
|   typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\ | ||||
|   typedef CVQual TensorAssignOp<typename my_left_type::Type, typename my_right_type::Type>  Type;\ | ||||
|   my_left_type lhsExpr;\ | ||||
|   my_right_type rhsExpr;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : lhsExpr(funcD.lhsExpr, t), rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr) {}\ | ||||
|  }; | ||||
|  | ||||
|  ASSIGN(const) | ||||
|  ASSIGN() | ||||
|  #undef ASSIGN | ||||
| /// specialisation of the \ref ExprConstructor struct when the node type is | ||||
| ///  TensorEvalToOp | ||||
| #define EVALTO(CVQual)\ | ||||
| template <typename OrigExpr, typename Expr, typename... Params>\ | ||||
| struct ExprConstructor<CVQual TensorEvalToOp<OrigExpr, MakeGlobalPointer>, CVQual TensorEvalToOp<Expr>, Params...> {\ | ||||
|   typedef ExprConstructor<OrigExpr, Expr, Params...> my_expr_type;\ | ||||
|   typedef typename TensorEvalToOp<OrigExpr, MakeGlobalPointer>::PointerType my_buffer_type;\ | ||||
|   typedef CVQual TensorEvalToOp<typename my_expr_type::Type, MakeGlobalPointer> Type;\ | ||||
|   my_expr_type nestedExpression;\ | ||||
|   EvalToLHSConstructor<my_buffer_type, 0, Params...> buffer;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : nestedExpression(funcD.rhsExpr, t), buffer(t), expr(buffer.expr, nestedExpression.expr) {}\ | ||||
| }; | ||||
|  | ||||
| EVALTO(const) | ||||
| EVALTO() | ||||
| #undef EVALTO | ||||
|  | ||||
| /// specialisation of the \ref ExprConstructor struct when the node type is | ||||
| /// TensorForcedEvalOp | ||||
| #define FORCEDEVAL(CVQual)\ | ||||
| template <typename OrigExpr, typename DevExpr, size_t N, typename... Params>\ | ||||
| struct ExprConstructor<CVQual TensorForcedEvalOp<OrigExpr, MakeGlobalPointer>,\ | ||||
| CVQual PlaceHolder<CVQual TensorForcedEvalOp<DevExpr>, N>, Params...> {\ | ||||
|   typedef CVQual TensorMap<Tensor<typename TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::Scalar,\ | ||||
|   TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::NumDimensions, 0, typename TensorForcedEvalOp<DevExpr>::Index>, 0, MakeGlobalPointer> Type;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\ | ||||
| }; | ||||
|  | ||||
| FORCEDEVAL(const) | ||||
| FORCEDEVAL() | ||||
| #undef FORCEDEVAL | ||||
|  | ||||
| template <bool Conds,  size_t X , size_t Y > struct ValueCondition { | ||||
|   static const size_t Res =X; | ||||
| }; | ||||
| template<size_t X, size_t Y> struct ValueCondition<false, X , Y> { | ||||
|   static const size_t Res =Y; | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExprConstructor struct when the node type is TensorReductionOp | ||||
| #define SYCLREDUCTIONEXPR(CVQual)\ | ||||
| template <typename OP, typename Dim, typename OrigExpr, typename DevExpr, size_t N, typename... Params>\ | ||||
| struct ExprConstructor<CVQual TensorReductionOp<OP, Dim, OrigExpr, MakeGlobalPointer>,\ | ||||
| CVQual PlaceHolder<CVQual TensorReductionOp<OP, Dim, DevExpr>, N>, Params...> {\ | ||||
|   static const size_t NumIndices= ValueCondition< TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::NumDimensions==0,  1, TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::NumDimensions >::Res;\ | ||||
|   typedef CVQual TensorMap<Tensor<typename TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::Scalar,\ | ||||
|   NumIndices, 0, typename TensorReductionOp<OP, Dim, DevExpr>::Index>, 0, MakeGlobalPointer> Type;\ | ||||
|   Type expr;\ | ||||
|   template <typename FuncDetector>\ | ||||
|   ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ | ||||
|   : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\ | ||||
| }; | ||||
|  | ||||
| SYCLREDUCTIONEXPR(const) | ||||
| SYCLREDUCTIONEXPR() | ||||
| #undef SYCLREDUCTIONEXPR | ||||
|  | ||||
| /// template deduction for \ref ExprConstructor struct | ||||
| template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params> | ||||
| auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t) | ||||
|     -> decltype(ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t)) { | ||||
|   return ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t); | ||||
| } | ||||
|  | ||||
| } /// namespace TensorSycl | ||||
| } /// namespace internal | ||||
| } /// namespace Eigen | ||||
|  | ||||
|  | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP | ||||
							
								
								
									
										204
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										204
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,204 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensorSyclExtractAccessor.h | ||||
|  * | ||||
|  * \brief: | ||||
|  * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl | ||||
|  * buffers as an input. Using pre-order tree traversal, ExtractAccessor | ||||
|  * recursively calls itself for its children in the expression tree. The | ||||
|  * leaf node in the PlaceHolder expression is nothing but a container preserving | ||||
|  * the order of the actual data in the tuple of sycl buffer. By invoking the | ||||
|  * extract accessor for the PlaceHolder<N>, an accessor is created for the Nth | ||||
|  * buffer in the tuple of buffers. This accessor is then added as an Nth | ||||
|  * element in the tuple of accessors. In this case we preserve the order of data | ||||
|  * in the expression tree. | ||||
|  * | ||||
|  * This is the specialisation of extract accessor method for different operation | ||||
|  * type in the PlaceHolder expression. | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace TensorSycl { | ||||
| namespace internal { | ||||
| /// struct ExtractAccessor: Extract Accessor Class is used to extract the | ||||
| /// accessor from a buffer. | ||||
| /// Depending on the type of the leaf node we can get a read accessor or a | ||||
| /// read_write accessor | ||||
| template <typename Evaluator> | ||||
| struct ExtractAccessor; | ||||
|  | ||||
| struct AccessorConstructor{ | ||||
|   template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, Arg eval) | ||||
|   -> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) { | ||||
|   return ExtractAccessor<Arg>::getTuple(cgh, eval); | ||||
|   } | ||||
|  | ||||
|   template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2) | ||||
|   -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) { | ||||
|     return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2)); | ||||
|   } | ||||
|   template<typename Arg1, typename Arg2, typename Arg3>	static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3) | ||||
|   -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) { | ||||
|     return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3))); | ||||
|   } | ||||
|   template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval) | ||||
|   -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM, | ||||
|   typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()))){ | ||||
|     return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM, typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data())); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is | ||||
| /// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp | ||||
| template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > { | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> eval) | ||||
|   -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){ | ||||
|     return AccessorConstructor::getTuple(cgh, eval.impl()); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseNullaryOp,  TensorCwiseUnaryOp and  TensorBroadcastingOp | ||||
| template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> > | ||||
| : ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {}; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorCwiseBinaryOp | ||||
| template <template<class, class, class> class BinaryCategory, typename OP,  typename LHSExpr, typename RHSExpr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > { | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> eval) | ||||
|   -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ | ||||
|     return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); | ||||
|   } | ||||
| }; | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseBinaryOp | ||||
| template <template<class, class, class> class BinaryCategory, typename OP,  typename LHSExpr, typename RHSExpr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > | ||||
| : ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is | ||||
| /// const TensorCwiseTernaryOp | ||||
| template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > { | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> eval) | ||||
|   -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){ | ||||
|     return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl()); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseTernaryOp | ||||
| template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > | ||||
| : ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is | ||||
| /// const TensorCwiseSelectOp. This is a special case where there is no OP | ||||
| template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > { | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> eval) | ||||
|   -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){ | ||||
|     return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl()); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is | ||||
| /// TensorCwiseSelectOp. This is a special case where there is no OP | ||||
| template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > | ||||
| : ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorAssignOp | ||||
| template <typename LHSExpr, typename RHSExpr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > { | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval) | ||||
|   -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ | ||||
|     return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); | ||||
|  } | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is TensorAssignOp | ||||
| template <typename LHSExpr, typename RHSExpr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> > | ||||
| : ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorMap | ||||
| #define TENSORMAPEXPR(CVQual, ACCType)\ | ||||
| template <typename PlainObjectType, int Options_, typename Dev>\ | ||||
| struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\ | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> eval)\ | ||||
|   -> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\ | ||||
|     return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\ | ||||
|   }\ | ||||
| }; | ||||
| TENSORMAPEXPR(const, cl::sycl::access::mode::read) | ||||
| TENSORMAPEXPR(, cl::sycl::access::mode::read_write) | ||||
| #undef TENSORMAPEXPR | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorForcedEvalOp | ||||
| template <typename Expr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> > { | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval) | ||||
|   -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){ | ||||
|     return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp | ||||
| template <typename Expr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev> > | ||||
| : ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorEvalToOp | ||||
| template <typename Expr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> > { | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval) | ||||
|   -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){ | ||||
|     return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl())); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is TensorEvalToOp | ||||
| template <typename Expr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev> > | ||||
| : ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is const TensorReductionOp | ||||
| template <typename OP, typename Dim, typename Expr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> > { | ||||
|   static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> eval) | ||||
|   -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){ | ||||
|     return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref ExtractAccessor struct when the node type is TensorReductionOp | ||||
| template <typename OP, typename Dim, typename Expr, typename Dev> | ||||
| struct ExtractAccessor<TensorEvaluator<TensorReductionOp<OP, Dim, Expr>, Dev> > | ||||
| : ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> >{}; | ||||
|  | ||||
| /// template deduction for \ref ExtractAccessor | ||||
| template <typename Evaluator> | ||||
| auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr) | ||||
| -> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) { | ||||
|   return ExtractAccessor<Evaluator>::getTuple(cgh, expr); | ||||
| } | ||||
|  | ||||
| } /// namespace TensorSycl | ||||
| } /// namespace internal | ||||
| } /// namespace Eigen | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP | ||||
							
								
								
									
										177
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										177
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,177 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensorSyclextractFunctors.h | ||||
|  * | ||||
|  * \brief: | ||||
|  *  Used to extract all the functors allocated to each node of the expression | ||||
| *tree. | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace TensorSycl { | ||||
| namespace internal { | ||||
| /// struct FunctorExtractor:  This struct is used to extract the functors | ||||
| /// constructed on | ||||
| /// the host-side, to pack them and reuse them in reconstruction of the | ||||
| /// expression on the device. | ||||
| /// We have to do that as in Eigen the functors are not stateless so we cannot | ||||
| /// re-instantiate them on the device. | ||||
| /// We have to pass instantiated functors to the device. | ||||
| // This struct is used for leafNode (TensorMap) and nodes behaving like leafNode (TensorForcedEval). | ||||
| template <typename Evaluator> struct FunctorExtractor{ | ||||
|   typedef typename Evaluator::Dimensions Dimensions; | ||||
|   const Dimensions m_dimensions; | ||||
|   const Dimensions& dimensions() const { return m_dimensions; } | ||||
|   FunctorExtractor(const Evaluator& expr) | ||||
|   : m_dimensions(expr.dimensions()) {} | ||||
|  | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp | ||||
| template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > { | ||||
|   FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; | ||||
|   OP func; | ||||
|   FunctorExtractor(const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev>& expr) | ||||
|   : rhsExpr(expr.impl()), func(expr.functor()) {} | ||||
| }; | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp | ||||
| template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> > | ||||
| : FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// const TensorCwiseBinaryOp | ||||
| template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > { | ||||
|   FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr; | ||||
|   FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; | ||||
|   OP func; | ||||
|   FunctorExtractor(const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr) | ||||
|   : lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {} | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// const TensorCwiseBinaryOp | ||||
| template <template <class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<BinaryCategory<OP,  LHSExpr, RHSExpr>, Dev> > | ||||
| : FunctorExtractor<TensorEvaluator<const BinaryCategory<OP,  LHSExpr, RHSExpr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// const TensorCwiseTernaryOp | ||||
| template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > { | ||||
|   FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr; | ||||
|   FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr; | ||||
|   FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr; | ||||
|   OP func; | ||||
|   FunctorExtractor(const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr) | ||||
|   : arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {} | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// TensorCwiseTernaryOp | ||||
| template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator< TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > | ||||
| :FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{}; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// const TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated. | ||||
| template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> | ||||
| struct FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > { | ||||
|   FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr; | ||||
|   FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr; | ||||
|   FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr; | ||||
|   FunctorExtractor(const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr) | ||||
|   : ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {} | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated | ||||
| template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > | ||||
| :FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {}; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// const TensorAssignOp. This is an specialisation without OP so it has to be separated. | ||||
| template <typename LHSExpr, typename RHSExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > { | ||||
|   FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr; | ||||
|   FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; | ||||
|   FunctorExtractor(const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr) | ||||
|   : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {} | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// TensorAssignOp. This is an specialisation without OP so it has to be separated. | ||||
| template <typename LHSExpr, typename RHSExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> > | ||||
| :FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{}; | ||||
|  | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// const TensorEvalToOp, This is an specialisation without OP so it has to be separated. | ||||
| template <typename RHSExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > { | ||||
|   FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; | ||||
|   FunctorExtractor(const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr) | ||||
|   : rhsExpr(expr.impl()) {} | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref FunctorExtractor struct when the node type is | ||||
| /// TensorEvalToOp. This is a specialisation without OP so it has to be separated. | ||||
| template <typename RHSExpr, typename Dev> | ||||
| struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev> > | ||||
| : FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {}; | ||||
|  | ||||
| template<typename Dim, size_t NumOutputDim> struct DimConstr { | ||||
| template<typename InDim> | ||||
|   static inline Dim getDim(InDim dims ) {return dims;} | ||||
| }; | ||||
|  | ||||
| template<typename Dim> struct DimConstr<Dim, 0> { | ||||
|   template<typename InDim> | ||||
|     static inline Dim getDim(InDim dims ) {return Dim(dims.TotalSize());} | ||||
| }; | ||||
|  | ||||
| template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> | ||||
| struct FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{ | ||||
|   typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Evaluator; | ||||
|   typedef typename Eigen::internal::conditional<Evaluator::NumOutputDims==0, DSizes<typename Evaluator::Index, 1>, typename Evaluator::Dimensions >::type Dimensions; | ||||
|   const Dimensions m_dimensions; | ||||
|   const Dimensions& dimensions() const { return m_dimensions; } | ||||
|   FunctorExtractor(const TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>& expr) | ||||
|   : m_dimensions(DimConstr<Dimensions, Evaluator::NumOutputDims>::getDim(expr.dimensions())) {} | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> | ||||
| struct FunctorExtractor<TensorEvaluator<TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>> | ||||
| : FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{}; | ||||
| /// template deduction function for FunctorExtractor | ||||
| template <typename Evaluator> | ||||
| auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> { | ||||
|   return FunctorExtractor<Evaluator>(evaluator); | ||||
| } | ||||
| }  // namespace internal | ||||
| }  // namespace TensorSycl | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP | ||||
							
								
								
									
										114
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensorSyclLeafCount.h | ||||
|  * | ||||
|  * \brief: | ||||
|  *  The leaf count used the pre-order expression tree traverse in order to name | ||||
|  *  count the number of leaf nodes in the expression | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace TensorSycl { | ||||
| namespace internal { | ||||
| /// \brief LeafCount used to counting terminal nodes. The total number of | ||||
| /// leaf nodes is used by MakePlaceHolderExprHelper to find the order | ||||
| /// of the leaf node in a expression tree at compile time. | ||||
| template <typename Expr> | ||||
| struct LeafCount; | ||||
|  | ||||
| template<typename... Args> struct CategoryCount; | ||||
|  | ||||
| template<> struct CategoryCount<> | ||||
| { | ||||
|   static const size_t Count =0; | ||||
| }; | ||||
|  | ||||
| template<typename Arg, typename... Args> | ||||
| struct CategoryCount<Arg,Args...>{ | ||||
|   static const size_t Count = LeafCount<Arg>::Count + CategoryCount<Args...>::Count; | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is const TensorMap | ||||
| template <typename PlainObjectType, int Options_, template <class> class MakePointer_> | ||||
| struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> > { | ||||
|   static const size_t Count =1; | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is TensorMap | ||||
| template <typename PlainObjectType, int Options_, template <class> class MakePointer_> | ||||
| struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_> > :LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> >{}; | ||||
|  | ||||
| // const TensorCwiseUnaryOp, const TensorCwiseNullaryOp, const TensorCwiseBinaryOp, const TensorCwiseTernaryOp, and Const TensorBroadcastingOp | ||||
| template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr> | ||||
| struct LeafCount<const CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {}; | ||||
| // TensorCwiseUnaryOp,  TensorCwiseNullaryOp,  TensorCwiseBinaryOp,  TensorCwiseTernaryOp, and  TensorBroadcastingOp | ||||
| template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr> | ||||
| struct LeafCount<CategoryExpr<OP, RHSExpr...> > :LeafCount<const CategoryExpr<OP, RHSExpr...> >{}; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is const TensorSelectOp is an exception | ||||
| template <typename IfExpr, typename ThenExpr, typename ElseExpr> | ||||
| struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {}; | ||||
| /// specialisation of the \ref LeafCount struct when the node type is TensorSelectOp | ||||
| template <typename IfExpr, typename ThenExpr, typename ElseExpr> | ||||
| struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >: LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > {}; | ||||
|  | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is const TensorAssignOp | ||||
| template <typename LHSExpr, typename RHSExpr> | ||||
| struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {}; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is | ||||
| /// TensorAssignOp is an exception. It is not the same as Unary | ||||
| template <typename LHSExpr, typename RHSExpr> | ||||
| struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr> > :LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >{}; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is const TensorForcedEvalOp | ||||
| template <typename Expr> | ||||
| struct LeafCount<const TensorForcedEvalOp<Expr> > { | ||||
|     static const size_t Count =1; | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is TensorForcedEvalOp | ||||
| template <typename Expr> | ||||
| struct LeafCount<TensorForcedEvalOp<Expr> >: LeafCount<const TensorForcedEvalOp<Expr> > {}; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is const TensorEvalToOp | ||||
| template <typename Expr> | ||||
| struct LeafCount<const TensorEvalToOp<Expr> > { | ||||
|   static const size_t Count = 1 + CategoryCount<Expr>::Count; | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is const TensorReductionOp | ||||
| template <typename OP, typename Dim, typename Expr> | ||||
| struct LeafCount<const TensorReductionOp<OP, Dim, Expr> > { | ||||
|     static const size_t Count =1; | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is TensorReductionOp | ||||
| template <typename OP, typename Dim, typename Expr> | ||||
| struct LeafCount<TensorReductionOp<OP, Dim, Expr> >: LeafCount<const TensorReductionOp<OP, Dim, Expr> >{}; | ||||
|  | ||||
| /// specialisation of the \ref LeafCount struct when the node type is TensorEvalToOp | ||||
| template <typename Expr> | ||||
| struct LeafCount<TensorEvalToOp<Expr> >: LeafCount<const TensorEvalToOp<Expr> >{}; | ||||
|  | ||||
| } /// namespace TensorSycl | ||||
| } /// namespace internal | ||||
| } /// namespace Eigen | ||||
|  | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP | ||||
							
								
								
									
										181
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,181 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensorSyclPlaceHolderExpr.h | ||||
|  * | ||||
|  * \brief: | ||||
|  *  This is the specialisation of the placeholder expression based on the | ||||
|  * operation type | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace TensorSycl { | ||||
| namespace internal { | ||||
|  | ||||
| /// \struct PlaceHolder | ||||
| /// \brief PlaceHolder is used to replace the \ref TensorMap in the expression | ||||
| /// tree. | ||||
| /// PlaceHolder contains the order of the leaf node in the expression tree. | ||||
| template <typename Scalar, size_t N> | ||||
| struct PlaceHolder { | ||||
|   static constexpr size_t I = N; | ||||
|   typedef Scalar Type; | ||||
| }; | ||||
|  | ||||
| /// \sttruct PlaceHolderExpression | ||||
| /// \brief it is used to create the PlaceHolder expression. The PlaceHolder | ||||
| /// expression is a copy of expression type in which the TensorMap of the has | ||||
| /// been replaced with PlaceHolder. | ||||
| template <typename Expr, size_t N> | ||||
| struct PlaceHolderExpression; | ||||
|  | ||||
| template<size_t N, typename... Args> | ||||
| struct CalculateIndex; | ||||
|  | ||||
| template<size_t N, typename Arg> | ||||
| struct CalculateIndex<N, Arg>{ | ||||
|   typedef typename PlaceHolderExpression<Arg, N>::Type ArgType; | ||||
|   typedef utility::tuple::Tuple<ArgType> ArgsTuple; | ||||
| }; | ||||
|  | ||||
| template<size_t N, typename Arg1, typename Arg2> | ||||
| struct CalculateIndex<N, Arg1, Arg2>{ | ||||
|   static const size_t Arg2LeafCount = LeafCount<Arg2>::Count; | ||||
|   typedef typename PlaceHolderExpression<Arg1, N - Arg2LeafCount>::Type Arg1Type; | ||||
|   typedef typename PlaceHolderExpression<Arg2, N>::Type Arg2Type; | ||||
|   typedef utility::tuple::Tuple<Arg1Type, Arg2Type> ArgsTuple; | ||||
| }; | ||||
|  | ||||
| template<size_t N, typename Arg1, typename Arg2, typename Arg3> | ||||
| struct CalculateIndex<N, Arg1, Arg2, Arg3> { | ||||
|   static const size_t Arg3LeafCount = LeafCount<Arg3>::Count; | ||||
|   static const size_t Arg2LeafCount = LeafCount<Arg2>::Count; | ||||
|   typedef typename PlaceHolderExpression<Arg1, N - Arg3LeafCount - Arg2LeafCount>::Type Arg1Type; | ||||
|   typedef typename PlaceHolderExpression<Arg2, N - Arg3LeafCount>::Type Arg2Type; | ||||
|   typedef typename PlaceHolderExpression<Arg3, N>::Type Arg3Type; | ||||
|   typedef utility::tuple::Tuple<Arg1Type, Arg2Type, Arg3Type> ArgsTuple; | ||||
| }; | ||||
|  | ||||
| template<template<class...> class Category , class OP, class TPL> | ||||
| struct CategoryHelper; | ||||
|  | ||||
| template<template<class...> class Category , class OP, class ...T > | ||||
| struct CategoryHelper<Category, OP, utility::tuple::Tuple<T...> > { | ||||
|   typedef Category<OP, T... > Type; | ||||
| }; | ||||
|  | ||||
| template<template<class...> class Category , class ...T > | ||||
| struct CategoryHelper<Category, NoOP, utility::tuple::Tuple<T...> > { | ||||
|   typedef Category<T... > Type; | ||||
| }; | ||||
|  | ||||
| /// specialisation of the \ref PlaceHolderExpression when the node is | ||||
| /// TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorBroadcastingOp, TensorCwiseBinaryOp,  TensorCwiseTernaryOp | ||||
| #define OPEXPRCATEGORY(CVQual)\ | ||||
| template <template <class, class... > class Category, typename OP, typename... SubExpr, size_t N>\ | ||||
| struct PlaceHolderExpression<CVQual Category<OP, SubExpr...>, N>{\ | ||||
|   typedef CVQual typename CategoryHelper<Category, OP, typename CalculateIndex<N, SubExpr...>::ArgsTuple>::Type Type;\ | ||||
| }; | ||||
|  | ||||
| OPEXPRCATEGORY(const) | ||||
| OPEXPRCATEGORY() | ||||
| #undef OPEXPRCATEGORY | ||||
|  | ||||
| /// specialisation of the \ref PlaceHolderExpression when the node is | ||||
| /// TensorCwiseSelectOp | ||||
| #define SELECTEXPR(CVQual)\ | ||||
| template <typename IfExpr, typename ThenExpr, typename ElseExpr, size_t N>\ | ||||
| struct PlaceHolderExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, N> {\ | ||||
|   typedef CVQual typename CategoryHelper<TensorSelectOp, NoOP, typename CalculateIndex<N, IfExpr, ThenExpr, ElseExpr>::ArgsTuple>::Type Type;\ | ||||
| }; | ||||
|  | ||||
| SELECTEXPR(const) | ||||
| SELECTEXPR() | ||||
| #undef SELECTEXPR | ||||
|  | ||||
| /// specialisation of the \ref PlaceHolderExpression when the node is | ||||
| /// TensorAssignOp | ||||
| #define ASSIGNEXPR(CVQual)\ | ||||
| template <typename LHSExpr, typename RHSExpr, size_t N>\ | ||||
| struct PlaceHolderExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr>, N> {\ | ||||
|   typedef CVQual typename CategoryHelper<TensorAssignOp, NoOP, typename CalculateIndex<N, LHSExpr, RHSExpr>::ArgsTuple>::Type Type;\ | ||||
| }; | ||||
|  | ||||
| ASSIGNEXPR(const) | ||||
| ASSIGNEXPR() | ||||
| #undef ASSIGNEXPR | ||||
|  | ||||
| /// specialisation of the \ref PlaceHolderExpression when the node is | ||||
| /// TensorMap | ||||
| #define TENSORMAPEXPR(CVQual)\ | ||||
| template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_, size_t N>\ | ||||
| struct PlaceHolderExpression< CVQual TensorMap< Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> {\ | ||||
|   typedef CVQual PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> Type;\ | ||||
| }; | ||||
|  | ||||
| TENSORMAPEXPR(const) | ||||
| TENSORMAPEXPR() | ||||
| #undef TENSORMAPEXPR | ||||
|  | ||||
| /// specialisation of the \ref PlaceHolderExpression when the node is | ||||
| /// TensorForcedEvalOp | ||||
| #define FORCEDEVAL(CVQual)\ | ||||
| template <typename Expr, size_t N>\ | ||||
| struct PlaceHolderExpression<CVQual TensorForcedEvalOp<Expr>, N> {\ | ||||
|   typedef CVQual PlaceHolder<CVQual TensorForcedEvalOp<Expr>, N> Type;\ | ||||
| }; | ||||
|  | ||||
| FORCEDEVAL(const) | ||||
| FORCEDEVAL() | ||||
| #undef FORCEDEVAL | ||||
|  | ||||
| /// specialisation of the \ref PlaceHolderExpression when the node is | ||||
| /// TensorEvalToOp | ||||
| #define EVALTO(CVQual)\ | ||||
| template <typename Expr, size_t N>\ | ||||
| struct PlaceHolderExpression<CVQual TensorEvalToOp<Expr>, N> {\ | ||||
|   typedef CVQual TensorEvalToOp<typename CalculateIndex <N, Expr>::ArgType> Type;\ | ||||
| }; | ||||
|  | ||||
| EVALTO(const) | ||||
| EVALTO() | ||||
| #undef EVALTO | ||||
|  | ||||
|  | ||||
| /// specialisation of the \ref PlaceHolderExpression when the node is | ||||
| /// TensorReductionOp | ||||
| #define SYCLREDUCTION(CVQual)\ | ||||
| template <typename OP, typename Dims, typename Expr, size_t N>\ | ||||
| struct PlaceHolderExpression<CVQual TensorReductionOp<OP, Dims, Expr>, N>{\ | ||||
|   typedef CVQual PlaceHolder<CVQual TensorReductionOp<OP, Dims,Expr>, N> Type;\ | ||||
| }; | ||||
| SYCLREDUCTION(const) | ||||
| SYCLREDUCTION() | ||||
| #undef SYCLREDUCTION | ||||
|  | ||||
| /// template deduction for \ref PlaceHolderExpression struct | ||||
| template <typename Expr> | ||||
| struct createPlaceHolderExpression { | ||||
|   static const size_t TotalLeaves = LeafCount<Expr>::Count; | ||||
|   typedef typename PlaceHolderExpression<Expr, TotalLeaves - 1>::Type Type; | ||||
| }; | ||||
|  | ||||
| }  // internal | ||||
| }  // TensorSycl | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP | ||||
							
								
								
									
										70
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Cummins Chris PhD student at The University of Edinburgh. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensorSyclRun.h | ||||
|  * | ||||
|  * \brief: | ||||
|  * Schedule_kernel invoke an specialised version of kernel struct. The | ||||
|  * specialisation is based on the data dimension in sycl buffer | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace TensorSycl { | ||||
| /// The run function in tensor sycl convert the expression tree to a buffer | ||||
| /// based expression tree; | ||||
| /// creates the expression tree for the device with accessor to buffers; | ||||
| /// construct the kernel and submit it to the sycl queue. | ||||
| template <typename Expr, typename Dev> | ||||
| void run(Expr &expr, Dev &dev) { | ||||
|   Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev); | ||||
|   const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); | ||||
|   if (needs_assign) { | ||||
|     typedef  typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr; | ||||
|     auto functors = internal::extractFunctors(evaluator); | ||||
|  | ||||
|     size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; | ||||
|     dev.m_queue.submit([&](cl::sycl::handler &cgh) { | ||||
|  | ||||
|       // create a tuple of accessors from Evaluator | ||||
|       auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator); | ||||
|       const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0]; | ||||
|       size_t GRange=range; | ||||
|       if (tileSize>GRange) tileSize=GRange; | ||||
|       else if(GRange>tileSize){ | ||||
|         size_t xMode = GRange % tileSize; | ||||
|         if (xMode != 0) GRange += (tileSize - xMode); | ||||
|       } | ||||
|       // run the kernel | ||||
|       cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { | ||||
|         typedef  typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr; | ||||
|         auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); | ||||
|         auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice()); | ||||
|         if (itemID.get_global_linear_id() < range) { | ||||
|           device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id())); | ||||
|         } | ||||
|       }); | ||||
|     }); | ||||
|     dev.m_queue.throw_asynchronous(); | ||||
|   } | ||||
|  | ||||
|   evaluator.cleanup(); | ||||
| } | ||||
| }  // namespace TensorSycl | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP | ||||
							
								
								
									
										237
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										237
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,237 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Mehdi Goli    Codeplay Software Ltd. | ||||
| // Ralph Potter  Codeplay Software Ltd. | ||||
| // Luke Iwanski  Codeplay Software Ltd. | ||||
| // Contact: <eigen@codeplay.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| /***************************************************************** | ||||
|  * TensroSyclTuple.h | ||||
|  * | ||||
|  * \brief: | ||||
|  *  Minimal implementation of std::tuple that can be used inside a SYCL kernel. | ||||
|  * | ||||
| *****************************************************************/ | ||||
|  | ||||
| #ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP | ||||
| #define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP | ||||
| namespace utility { | ||||
| namespace tuple { | ||||
| /// \struct StaticIf | ||||
| /// \brief The StaticIf struct is used to statically choose the type based on the | ||||
| /// condition. | ||||
| template <bool, typename T = void> struct StaticIf; | ||||
| /// \brief specialisation of the \ref StaticIf when the condition is true | ||||
| template <typename T> | ||||
| struct StaticIf<true, T> { | ||||
|   typedef T type; | ||||
| }; | ||||
|  | ||||
| /// \struct Tuple | ||||
| /// \brief is a fixed-size collection of heterogeneous values | ||||
| /// \tparam Ts...	-	the types of the elements that the tuple stores. | ||||
| /// Empty list is supported. | ||||
| template <class... Ts> | ||||
| struct Tuple {}; | ||||
|  | ||||
| /// \brief specialisation of the \ref Tuple class when the tuple has at least | ||||
| /// one element. | ||||
| /// \tparam T : the type of the first element in the tuple. | ||||
| /// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. | ||||
| template <class T, class... Ts> | ||||
| struct Tuple<T, Ts...> { | ||||
|   Tuple(T t, Ts... ts) : head(t), tail(ts...) {} | ||||
|   T head; | ||||
|   Tuple<Ts...> tail; | ||||
| }; | ||||
|  | ||||
| ///\ struct ElemTypeHolder | ||||
| /// \brief ElemTypeHolder class is used to specify the types of the | ||||
| /// elements inside the tuple | ||||
| /// \tparam size_t the number of elements inside the tuple | ||||
| /// \tparam class the tuple class | ||||
| template <size_t, class> | ||||
| struct ElemTypeHolder; | ||||
|  | ||||
| /// \brief specialisation of the \ref ElemTypeHolder class when the number of | ||||
| /// elements inside the tuple is 1 | ||||
| template <class T, class... Ts> | ||||
| struct ElemTypeHolder<0, Tuple<T, Ts...> > { | ||||
|   typedef T type; | ||||
| }; | ||||
|  | ||||
| /// \brief specialisation of the \ref ElemTypeHolder class when the number of | ||||
| /// elements inside the tuple is bigger than 1. It recursively calls itself to | ||||
| /// detect the type of each element in the tuple | ||||
| /// \tparam T : the type of the first element in the tuple. | ||||
| /// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. | ||||
| /// \tparam K is the Kth element in the tuple | ||||
| template <size_t k, class T, class... Ts> | ||||
| struct ElemTypeHolder<k, Tuple<T, Ts...> > { | ||||
|   typedef typename ElemTypeHolder<k - 1, Tuple<Ts...> >::type type; | ||||
| }; | ||||
|  | ||||
| /// get | ||||
| /// \brief Extracts the first element from the tuple. | ||||
| /// K=0 represents the first element of the tuple. The tuple cannot be empty. | ||||
| /// \tparam Ts... are the type of the elements in the tuple. | ||||
| /// \param t is the tuple whose contents to extract | ||||
| /// \return  typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type | ||||
|  | ||||
| #define TERMINATE_CONDS_TUPLE_GET(CVQual) \ | ||||
| template <size_t k, class... Ts> \ | ||||
| typename StaticIf<k == 0, CVQual typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type \ | ||||
| get(CVQual Tuple<Ts...> &t) { \ | ||||
|   static_assert(sizeof...(Ts)!=0, "The requseted value is bigger than the size of the tuple"); \ | ||||
|   return t.head; \ | ||||
| } | ||||
|  | ||||
| TERMINATE_CONDS_TUPLE_GET(const) | ||||
| TERMINATE_CONDS_TUPLE_GET() | ||||
| #undef TERMINATE_CONDS_TUPLE_GET | ||||
| /// get | ||||
| /// \brief Extracts the Kth element from the tuple. | ||||
| ///\tparam K is an integer value in [0,sizeof...(Types)). | ||||
| /// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple | ||||
| /// \tparam Ts... are the type of the elements  in the tuple. | ||||
| /// \param t is the tuple whose contents to extract | ||||
| /// \return  typename ElemTypeHolder<K, Tuple<Ts...> >::type &>::type | ||||
| #define RECURSIVE_TUPLE_GET(CVQual) \ | ||||
| template <size_t k, class T, class... Ts> \ | ||||
| typename StaticIf<k != 0, CVQual typename ElemTypeHolder<k, Tuple<T, Ts...> >::type &>::type \ | ||||
| get(CVQual Tuple<T, Ts...> &t) { \ | ||||
|   return utility::tuple::get<k - 1>(t.tail); \ | ||||
| } | ||||
| RECURSIVE_TUPLE_GET(const) | ||||
| RECURSIVE_TUPLE_GET() | ||||
| #undef RECURSIVE_TUPLE_GET | ||||
|  | ||||
| /// make_tuple | ||||
| /// \brief Creates a tuple object, deducing the target type from the types of | ||||
| /// arguments. | ||||
| /// \tparam Args the type of the arguments to construct the tuple from | ||||
| /// \param args zero or more arguments to construct the tuple from | ||||
| /// \return Tuple<Args...> | ||||
| template <typename... Args> | ||||
| Tuple<Args...> make_tuple(Args... args) { | ||||
|   return Tuple<Args...>(args...); | ||||
| } | ||||
|  | ||||
| /// size | ||||
| /// \brief Provides access to the number of elements in a tuple as a | ||||
| /// compile-time constant expression. | ||||
| /// \tparam Args the type of the arguments to construct the tuple from | ||||
| /// \return size_t | ||||
| template <typename... Args> | ||||
| static constexpr size_t size(Tuple<Args...> &) { | ||||
|   return sizeof...(Args); | ||||
| } | ||||
|  | ||||
| /// \struct IndexList | ||||
| /// \brief Creates a list of index from the elements in the tuple | ||||
| /// \tparam Is... a list of index from [0 to sizeof...(tuple elements)) | ||||
| template <size_t... Is> | ||||
| struct IndexList {}; | ||||
|  | ||||
| /// \struct RangeBuilder | ||||
| /// \brief Collects internal details for generating index ranges [MIN, MAX) | ||||
| /// Declare primary template for index range builder | ||||
| /// \tparam MIN is the starting index in the tuple | ||||
| /// \tparam N represents sizeof..(elemens)- sizeof...(Is) | ||||
| /// \tparam Is... are the list of generated index so far | ||||
| template <size_t MIN, size_t N, size_t... Is> | ||||
| struct RangeBuilder; | ||||
|  | ||||
| // FIXME Doxygen has problems with recursive inheritance | ||||
| #ifndef EIGEN_PARSED_BY_DOXYGEN | ||||
| /// \brief base Step: Specialisation of the \ref RangeBuilder when the | ||||
| /// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements)) | ||||
| /// \tparam MIN is the starting index of the tuple | ||||
| /// \tparam Is is [0 to sizeof...(tuple elements)) | ||||
| template <size_t MIN, size_t... Is> | ||||
| struct RangeBuilder<MIN, MIN, Is...> { | ||||
|   typedef IndexList<Is...> type; | ||||
| }; | ||||
|  | ||||
| /// Induction step: Specialisation of the RangeBuilder class when N!=MIN | ||||
| /// in this case we are recursively subtracting N by one and adding one | ||||
| /// index to Is... list until MIN==N | ||||
| /// \tparam MIN is the starting index in the tuple | ||||
| /// \tparam N represents sizeof..(elemens)- sizeof...(Is) | ||||
| /// \tparam Is... are the list of generated index so far | ||||
| template <size_t MIN, size_t N, size_t... Is> | ||||
| struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {}; | ||||
| #endif // EIGEN_PARSED_BY_DOXYGEN | ||||
|  | ||||
| /// \brief IndexRange that returns a [MIN, MAX) index range | ||||
| /// \tparam MIN is the starting index in the tuple | ||||
| /// \tparam MAX is the size of the tuple | ||||
| template <size_t MIN, size_t MAX> | ||||
| struct IndexRange: RangeBuilder<MIN, MAX>::type {}; | ||||
|  | ||||
| /// append_base | ||||
| /// \brief unpacking the elements of the input tuple t and creating a new tuple | ||||
| /// by adding element a at the end of it. | ||||
| ///\tparam Args... the type of the elements inside the tuple t | ||||
| /// \tparam T the type of the new element going to be added at the end of tuple | ||||
| /// \tparam I... is the list of index from [0 to sizeof...(t)) | ||||
| /// \param t the tuple on which we want to append a. | ||||
| /// \param a the new elements going to be added to the tuple | ||||
| /// \return Tuple<Args..., T> | ||||
| template <typename... Args, typename T, size_t... I> | ||||
| Tuple<Args..., T> append_base(Tuple<Args...> t, T a,IndexList<I...>) { | ||||
|   return utility::tuple::make_tuple(get<I>(t)..., a); | ||||
| } | ||||
|  | ||||
| /// append | ||||
| /// \brief the deduction function for \ref append_base that automatically | ||||
| /// generate the \ref IndexRange | ||||
| ///\tparam Args... the type of the elements inside the tuple t | ||||
| /// \tparam T the type of the new element going to be added at the end of tuple | ||||
| /// \param t the tuple on which we want to append a. | ||||
| /// \param a the new elements going to be added to the tuple | ||||
| /// \return Tuple<Args..., T> | ||||
| template <typename... Args, typename T> | ||||
| Tuple<Args..., T> append(Tuple<Args...> t, T a) { | ||||
|   return utility::tuple::append_base(t, a,  IndexRange<0, sizeof...(Args)>()); | ||||
| } | ||||
|  | ||||
| /// append_base | ||||
| /// \brief This is a specialisation of \ref append_base when we want to | ||||
| /// concatenate | ||||
| /// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate the | ||||
| /// IndexRange for each of them and create an output tuple T that contains both | ||||
| /// elements of t1 and t2. | ||||
| ///\tparam Args1... the type of the elements inside the tuple t1 | ||||
| ///\tparam Args2... the type of the elements inside the tuple t2 | ||||
| /// \tparam I1... is the list of index from [0 to sizeof...(t1)) | ||||
| /// \tparam I2... is the list of index from [0 to sizeof...(t2)) | ||||
| /// \param t1 is the tuple on which we want to append t2. | ||||
| /// \param t2 is the tuple that is going to be added on t1. | ||||
| /// \return Tuple<Args1..., Args2...> | ||||
| template <typename... Args1, typename... Args2, size_t... I1, size_t... I2> | ||||
| Tuple<Args1..., Args2...> append_base(Tuple<Args1...> t1, Tuple<Args2...> t2, IndexList<I1...>, IndexList<I2...>) { | ||||
|   return utility::tuple::make_tuple(get<I1>(t1)...,get<I2>(t2)...); | ||||
| } | ||||
|  | ||||
| /// append | ||||
| /// \brief deduction function for \ref append_base when we are appending tuple | ||||
| /// t1 by tuple t2. In this case the \ref IndexRange for both tuple are | ||||
| /// automatically generated. | ||||
| ///\tparam Args1... the type of the elements inside the tuple t1 | ||||
| ///\tparam Args2... the type of the elements inside the tuple t2 | ||||
| /// \param t1 is the tuple on which we want to append t2. | ||||
| /// \param t2 is the tuple that is going to be added on t1. | ||||
| /// \return Tuple<Args1..., Args2...> | ||||
| template <typename... Args1, typename... Args2> | ||||
| Tuple<Args1..., Args2...> append(Tuple<Args1...> t1,Tuple<Args2...> t2) { | ||||
|   return utility::tuple::append_base(t1, t2, IndexRange<0, sizeof...(Args1)>(), IndexRange<0, sizeof...(Args2)>()); | ||||
| } | ||||
| }  // tuple | ||||
| }  // utility | ||||
| #endif  // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP | ||||
							
								
								
									
										272
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										272
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,272 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
|  | ||||
| template<typename Scalar, int Options> | ||||
| class compute_tensor_flags | ||||
| { | ||||
|   enum { | ||||
|     is_dynamic_size_storage = 1, | ||||
|  | ||||
|     is_aligned = | ||||
|     ( | ||||
|         ((Options&DontAlign)==0) && ( | ||||
| #if EIGEN_MAX_STATIC_ALIGN_BYTES>0 | ||||
|             (!is_dynamic_size_storage) | ||||
| #else | ||||
|             0 | ||||
| #endif | ||||
|             | | ||||
| #if EIGEN_MAX_ALIGN_BYTES>0 | ||||
|             is_dynamic_size_storage | ||||
| #else | ||||
|             0 | ||||
| #endif | ||||
|       ) | ||||
|      ), | ||||
|     packet_access_bit = packet_traits<Scalar>::Vectorizable && is_aligned ? PacketAccessBit : 0 | ||||
|   }; | ||||
|  | ||||
|   public: | ||||
|     enum { ret = packet_access_bit }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Scalar_, int NumIndices_, int Options_, typename IndexType_> | ||||
| struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > | ||||
| { | ||||
|   typedef Scalar_ Scalar; | ||||
|   typedef Dense StorageKind; | ||||
|   typedef IndexType_ Index; | ||||
|   static const int NumDimensions = NumIndices_; | ||||
|   static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; | ||||
|   enum { | ||||
|     Options = Options_, | ||||
|     Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit) | ||||
|   }; | ||||
|   template <typename T> struct MakePointer { | ||||
|     typedef T* Type; | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Scalar_, typename Dimensions, int Options_, typename IndexType_> | ||||
| struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> > | ||||
| { | ||||
|   typedef Scalar_ Scalar; | ||||
|   typedef Dense StorageKind; | ||||
|   typedef IndexType_ Index; | ||||
|   static const int NumDimensions = array_size<Dimensions>::value; | ||||
|   static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; | ||||
|   enum { | ||||
|     Options = Options_, | ||||
|     Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit) | ||||
|   }; | ||||
|   template <typename T> struct MakePointer { | ||||
|     typedef T* Type; | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename PlainObjectType, int Options_, template <class> class MakePointer_> | ||||
| struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> > | ||||
|   : public traits<PlainObjectType> | ||||
| { | ||||
|   typedef traits<PlainObjectType> BaseTraits; | ||||
|   typedef typename BaseTraits::Scalar Scalar; | ||||
|   typedef typename BaseTraits::StorageKind StorageKind; | ||||
|   typedef typename BaseTraits::Index Index; | ||||
|   static const int NumDimensions = BaseTraits::NumDimensions; | ||||
|   static const int Layout = BaseTraits::Layout; | ||||
|   enum { | ||||
|     Options = Options_, | ||||
|     Flags = BaseTraits::Flags | ||||
|   }; | ||||
|   template <class T> struct MakePointer { | ||||
|     // Intermediate typedef to workaround MSVC issue. | ||||
|     typedef MakePointer_<T> MakePointerT; | ||||
|     typedef typename MakePointerT::Type Type; | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| template<typename PlainObjectType> | ||||
| struct traits<TensorRef<PlainObjectType> > | ||||
|   : public traits<PlainObjectType> | ||||
| { | ||||
|   typedef traits<PlainObjectType> BaseTraits; | ||||
|   typedef typename BaseTraits::Scalar Scalar; | ||||
|   typedef typename BaseTraits::StorageKind StorageKind; | ||||
|   typedef typename BaseTraits::Index Index; | ||||
|   static const int NumDimensions = BaseTraits::NumDimensions; | ||||
|   static const int Layout = BaseTraits::Layout; | ||||
|   enum { | ||||
|     Options = BaseTraits::Options, | ||||
|     Flags = BaseTraits::Flags | ||||
|   }; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename _Scalar, int NumIndices_, int Options, typename IndexType_> | ||||
| struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense> | ||||
| { | ||||
|   typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; | ||||
| }; | ||||
|  | ||||
| template<typename _Scalar, int NumIndices_, int Options, typename IndexType_> | ||||
| struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense> | ||||
| { | ||||
|   typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Scalar_, typename Dimensions, int Options, typename IndexType_> | ||||
| struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; | ||||
| }; | ||||
|  | ||||
| template<typename Scalar_, typename Dimensions, int Options, typename IndexType_> | ||||
| struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; | ||||
| }; | ||||
|  | ||||
| template<typename PlainObjectType, int Options, template <class> class MakePointer> | ||||
| struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; | ||||
| }; | ||||
|  | ||||
| template<typename PlainObjectType, int Options, template <class> class MakePointer> | ||||
| struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; | ||||
| }; | ||||
|  | ||||
| template<typename PlainObjectType> | ||||
| struct eval<TensorRef<PlainObjectType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorRef<PlainObjectType>& type; | ||||
| }; | ||||
|  | ||||
| template<typename PlainObjectType> | ||||
| struct eval<const TensorRef<PlainObjectType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorRef<PlainObjectType>& type; | ||||
| }; | ||||
|  | ||||
| // TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector. | ||||
| template<typename T, int n=1, typename PlainObject = void> struct nested | ||||
| { | ||||
|   typedef typename ref_selector<T>::type type; | ||||
| }; | ||||
|  | ||||
| template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_> | ||||
| struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > | ||||
| { | ||||
|   typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type; | ||||
| }; | ||||
|  | ||||
| template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_> | ||||
| struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> > | ||||
| { | ||||
|   typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type; | ||||
| }; | ||||
|  | ||||
| template <typename Scalar_, typename Dimensions, int Options, typename IndexType_> | ||||
| struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> > | ||||
| { | ||||
|   typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; | ||||
| }; | ||||
|  | ||||
| template <typename Scalar_, typename Dimensions, int Options, typename IndexType_> | ||||
| struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> > | ||||
| { | ||||
|   typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename PlainObjectType, int Options, template <class> class MakePointer> | ||||
| struct nested<TensorMap<PlainObjectType, Options, MakePointer> > | ||||
| { | ||||
|   typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; | ||||
| }; | ||||
|  | ||||
| template <typename PlainObjectType, int Options, template <class> class MakePointer> | ||||
| struct nested<const TensorMap<PlainObjectType, Options, MakePointer> > | ||||
| { | ||||
|   typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; | ||||
| }; | ||||
|  | ||||
| template <typename PlainObjectType> | ||||
| struct nested<TensorRef<PlainObjectType> > | ||||
| { | ||||
|   typedef const TensorRef<PlainObjectType>& type; | ||||
| }; | ||||
|  | ||||
| template <typename PlainObjectType> | ||||
| struct nested<const TensorRef<PlainObjectType> > | ||||
| { | ||||
|   typedef const TensorRef<PlainObjectType>& type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| // Convolutional layers take in an input tensor of shape (D, R, C, B), or (D, C, | ||||
| // R, B), and convolve it with a set of filters, which can also be presented as | ||||
| // a tensor (D, K, K, M), where M is the number of filters, K is the filter | ||||
| // size, and each 3-dimensional tensor of size (D, K, K) is a filter. For | ||||
| // simplicity we assume that we always use square filters (which is usually the | ||||
| // case in images), hence the two Ks in the tensor dimension.  It also takes in | ||||
| // a few additional parameters: | ||||
| // Stride (S): The convolution stride is the offset between locations where we | ||||
| //             apply the filters.  A larger stride means that the output will be | ||||
| //             spatially smaller. | ||||
| // Padding (P): The padding we apply to the input tensor along the R and C | ||||
| //              dimensions.  This is usually used to make sure that the spatial | ||||
| //              dimensions of the output matches our intention. | ||||
| // | ||||
| // Two types of padding are often used: | ||||
| //   SAME: The pad value is computed so that the output will have size | ||||
| //         R/S and C/S. | ||||
| //   VALID: no padding is carried out. | ||||
| // When we do padding, the padded values at the padded locations are usually | ||||
| // zero. | ||||
| // | ||||
| // The output dimensions for convolution, when given all the parameters above, | ||||
| // are as follows: | ||||
| // When Padding = SAME: the output size is (B, R', C', M), where | ||||
| //   R' = ceil(float(R) / float(S)) | ||||
| //   C' = ceil(float(C) / float(S)) | ||||
| // where ceil is the ceiling function.  The input tensor is padded with 0 as | ||||
| // needed.  The number of padded rows and columns are computed as: | ||||
| //   Pr = ((R' - 1) * S + K - R) / 2 | ||||
| //   Pc = ((C' - 1) * S + K - C) / 2 | ||||
| // when the stride is 1, we have the simplified case R'=R, C'=C, Pr=Pc=(K-1)/2. | ||||
| // This is where SAME comes from - the output has the same size as the input has. | ||||
| // When Padding = VALID: the output size is computed as | ||||
| //   R' = ceil(float(R - K + 1) / float(S)) | ||||
| //   C' = ceil(float(C - K + 1) / float(S)) | ||||
| // and the number of padded rows and columns are computed in the same way as in | ||||
| // the SAME case. | ||||
| // When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0, | ||||
| // Pc=0. | ||||
| typedef enum { | ||||
|   PADDING_VALID = 1, | ||||
|   PADDING_SAME = 2 | ||||
| } PaddingType; | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H | ||||
							
								
								
									
										248
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										248
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,248 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H | ||||
|  | ||||
| namespace Eigen { | ||||
| namespace internal { | ||||
|  | ||||
|  | ||||
| template <uint64_t n> | ||||
| struct static_val { | ||||
|   static const uint64_t value = n; | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } | ||||
|  | ||||
|   template <typename T> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { | ||||
|     eigen_assert(v == n); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename HIGH = uint64_t, typename LOW = uint64_t> | ||||
| struct TensorUInt128 | ||||
| { | ||||
|   HIGH high; | ||||
|   LOW low; | ||||
|  | ||||
|   template<typename OTHER_HIGH, typename OTHER_LOW> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
|   TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) { | ||||
|     EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|   } | ||||
|  | ||||
|   template<typename OTHER_HIGH, typename OTHER_LOW> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
|   TensorUInt128& operator = (const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) { | ||||
|     EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     high = other.high; | ||||
|     low = other.low; | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   template<typename T> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
|   explicit TensorUInt128(const T& x) : high(0), low(x) { | ||||
|     eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest())); | ||||
|     eigen_assert(x >= 0); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
|   TensorUInt128(HIGH y, LOW x) : high(y), low(x) { } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const { | ||||
|     return low; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const { | ||||
|     return low; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const { | ||||
|     return high; | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename HL, typename LL, typename HR, typename LR> | ||||
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) | ||||
| { | ||||
|   return (lhs.high == rhs.high) & (lhs.low == rhs.low); | ||||
| } | ||||
|  | ||||
| template <typename HL, typename LL, typename HR, typename LR> | ||||
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) | ||||
| { | ||||
|   return (lhs.high != rhs.high) | (lhs.low != rhs.low); | ||||
| } | ||||
|  | ||||
| template <typename HL, typename LL, typename HR, typename LR> | ||||
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) | ||||
| { | ||||
|   if (lhs.high != rhs.high) { | ||||
|     return lhs.high > rhs.high; | ||||
|   } | ||||
|   return lhs.low >= rhs.low; | ||||
| } | ||||
|  | ||||
| template <typename HL, typename LL, typename HR, typename LR> | ||||
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) | ||||
| { | ||||
|   if (lhs.high != rhs.high) { | ||||
|     return lhs.high < rhs.high; | ||||
|   } | ||||
|   return lhs.low < rhs.low; | ||||
| } | ||||
|  | ||||
| template <typename HL, typename LL, typename HR, typename LR> | ||||
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) | ||||
| { | ||||
|   TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low); | ||||
|   if (result.low < rhs.low) { | ||||
|     result.high += 1; | ||||
|   } | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| template <typename HL, typename LL, typename HR, typename LR> | ||||
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
| TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) | ||||
| { | ||||
|   TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low); | ||||
|   if (result.low > lhs.low) { | ||||
|     result.high -= 1; | ||||
|   } | ||||
|   return result; | ||||
| } | ||||
|  | ||||
|  | ||||
| template <typename HL, typename LL, typename HR, typename LR> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) | ||||
| { | ||||
|   // Split each 128-bit integer into 4 32-bit integers, and then do the | ||||
|   // multiplications by hand as follow: | ||||
|   //   lhs      a  b  c  d | ||||
|   //   rhs      e  f  g  h | ||||
|   //           ----------- | ||||
|   //           ah bh ch dh | ||||
|   //           bg cg dg | ||||
|   //           cf df | ||||
|   //           de | ||||
|   // The result is stored in 2 64bit integers, high and low. | ||||
|  | ||||
|   const uint64_t LOW = 0x00000000FFFFFFFFLL; | ||||
|   const uint64_t HIGH = 0xFFFFFFFF00000000LL; | ||||
|  | ||||
|   uint64_t d = lhs.low & LOW; | ||||
|   uint64_t c = (lhs.low & HIGH) >> 32LL; | ||||
|   uint64_t b = lhs.high & LOW; | ||||
|   uint64_t a = (lhs.high & HIGH) >> 32LL; | ||||
|  | ||||
|   uint64_t h = rhs.low & LOW; | ||||
|   uint64_t g = (rhs.low & HIGH) >> 32LL; | ||||
|   uint64_t f = rhs.high & LOW; | ||||
|   uint64_t e = (rhs.high & HIGH) >> 32LL; | ||||
|  | ||||
|   // Compute the low 32 bits of low | ||||
|   uint64_t acc = d * h; | ||||
|   uint64_t low = acc & LOW; | ||||
|   //  Compute the high 32 bits of low. Add a carry every time we wrap around | ||||
|   acc >>= 32LL; | ||||
|   uint64_t carry = 0; | ||||
|   uint64_t acc2 = acc + c * h; | ||||
|   if (acc2 < acc) { | ||||
|     carry++; | ||||
|   } | ||||
|   acc = acc2 + d * g; | ||||
|   if (acc < acc2) { | ||||
|     carry++; | ||||
|   } | ||||
|   low |= (acc << 32LL); | ||||
|  | ||||
|   // Carry forward the high bits of acc to initiate the computation of the | ||||
|   // low 32 bits of high | ||||
|   acc2 = (acc >> 32LL) | (carry << 32LL); | ||||
|   carry = 0; | ||||
|  | ||||
|   acc = acc2 + b * h; | ||||
|   if (acc < acc2) { | ||||
|     carry++; | ||||
|   } | ||||
|   acc2 = acc + c * g; | ||||
|   if (acc2 < acc) { | ||||
|     carry++; | ||||
|   } | ||||
|   acc = acc2 + d * f; | ||||
|   if (acc < acc2) { | ||||
|     carry++; | ||||
|   } | ||||
|   uint64_t high = acc & LOW; | ||||
|  | ||||
|   // Start to compute the high 32 bits of high. | ||||
|   acc2 = (acc >> 32LL) | (carry << 32LL); | ||||
|  | ||||
|   acc = acc2 + a * h; | ||||
|   acc2 = acc + b * g; | ||||
|   acc = acc2 + c * f; | ||||
|   acc2 = acc + d * e; | ||||
|   high |= (acc2 << 32LL); | ||||
|  | ||||
|   return TensorUInt128<uint64_t, uint64_t>(high, low); | ||||
| } | ||||
|  | ||||
| template <typename HL, typename LL, typename HR, typename LR> | ||||
| static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
| TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) | ||||
| { | ||||
|   if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) { | ||||
|     return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); | ||||
|   } else if (lhs < rhs) { | ||||
|     return TensorUInt128<uint64_t, uint64_t>(0); | ||||
|   } else { | ||||
|     // calculate the biggest power of 2 times rhs that's less than or equal to lhs | ||||
|     TensorUInt128<uint64_t, uint64_t> power2(1); | ||||
|     TensorUInt128<uint64_t, uint64_t> d(rhs); | ||||
|     TensorUInt128<uint64_t, uint64_t> tmp(lhs - d); | ||||
|     while (lhs >= d) { | ||||
|       tmp = tmp - d; | ||||
|       d = d + d; | ||||
|       power2 = power2 + power2; | ||||
|     } | ||||
|  | ||||
|     tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); | ||||
|     TensorUInt128<uint64_t, uint64_t> result(0); | ||||
|     while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) { | ||||
|       if (tmp >= d) { | ||||
|         tmp = tmp - d; | ||||
|         result = result + power2; | ||||
|       } | ||||
|       // Shift right | ||||
|       power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63)); | ||||
|       d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63)); | ||||
|     } | ||||
|  | ||||
|     return result; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| }  // namespace internal | ||||
| }  // namespace Eigen | ||||
|  | ||||
|  | ||||
| #endif  // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H | ||||
							
								
								
									
										608
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										608
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,608 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H | ||||
| #define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class TensorVolumePatch | ||||
|   * \ingroup CXX11_Tensor_Module | ||||
|   * | ||||
|   * \brief Patch extraction specialized for processing of volumetric data. | ||||
|   * This assumes that the input has a least 4 dimensions ordered as follows: | ||||
|   *  - channels | ||||
|   *  - planes | ||||
|   *  - rows | ||||
|   *  - columns | ||||
|   *  - (optional) additional dimensions such as time or batch size. | ||||
|   * Calling the volume patch code with patch_planes, patch_rows, and patch_cols | ||||
|   * is equivalent to calling the regular patch extraction code with parameters | ||||
|   * d, patch_planes, patch_rows, patch_cols, and 1 for all the additional | ||||
|   * dimensions. | ||||
|   */ | ||||
| namespace internal { | ||||
| template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> | ||||
| struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType> | ||||
| { | ||||
|   typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; | ||||
|   typedef traits<XprType> XprTraits; | ||||
|   typedef typename XprTraits::StorageKind StorageKind; | ||||
|   typedef typename XprTraits::Index Index; | ||||
|   typedef typename XprType::Nested Nested; | ||||
|   typedef typename remove_reference<Nested>::type _Nested; | ||||
|   static const int NumDimensions = XprTraits::NumDimensions + 1; | ||||
|   static const int Layout = XprTraits::Layout; | ||||
| }; | ||||
|  | ||||
| template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> | ||||
| struct eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, Eigen::Dense> | ||||
| { | ||||
|   typedef const TensorVolumePatchOp<Planes, Rows, Cols, XprType>& type; | ||||
| }; | ||||
|  | ||||
| template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> | ||||
| struct nested<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, 1, typename eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType> >::type> | ||||
| { | ||||
|   typedef TensorVolumePatchOp<Planes, Rows, Cols, XprType> type; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> | ||||
| class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, ReadOnlyAccessors> | ||||
| { | ||||
|   public: | ||||
|   typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Scalar Scalar; | ||||
|   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename Eigen::internal::nested<TensorVolumePatchOp>::type Nested; | ||||
|   typedef typename Eigen::internal::traits<TensorVolumePatchOp>::StorageKind StorageKind; | ||||
|   typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Index Index; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, | ||||
|                                                             DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, | ||||
|                                                             DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, | ||||
|                                                             DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, | ||||
|                                                             PaddingType padding_type, Scalar padding_value) | ||||
|       : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), | ||||
|         m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), | ||||
|         m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), | ||||
|         m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), | ||||
|         m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), | ||||
|         m_padding_type(padding_type), m_padding_value(padding_value) {} | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, | ||||
|                                                            DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, | ||||
|                                                            DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, | ||||
|                                                            DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, | ||||
|                                                            DenseIndex padding_top_z, DenseIndex padding_bottom_z, | ||||
|                                                            DenseIndex padding_top, DenseIndex padding_bottom, | ||||
|                                                            DenseIndex padding_left, DenseIndex padding_right, | ||||
|                                                            Scalar padding_value) | ||||
|       : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), | ||||
|         m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), | ||||
|         m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), | ||||
|         m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), | ||||
|         m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom), | ||||
|         m_padding_left(padding_left), m_padding_right(padding_right), | ||||
|         m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex patch_planes() const { return m_patch_planes; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex patch_rows() const { return m_patch_rows; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex patch_cols() const { return m_patch_cols; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex plane_strides() const { return m_plane_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex row_strides() const { return m_row_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex col_strides() const { return m_col_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex in_plane_strides() const { return m_in_plane_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex in_row_strides() const { return m_in_row_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex in_col_strides() const { return m_in_col_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     bool padding_explicit() const { return m_padding_explicit; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_top_z() const { return m_padding_top_z; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_bottom_z() const { return m_padding_bottom_z; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_top() const { return m_padding_top; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_bottom() const { return m_padding_bottom; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_left() const { return m_padding_left; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     DenseIndex padding_right() const { return m_padding_right; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     PaddingType padding_type() const { return m_padding_type; } | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     Scalar padding_value() const { return m_padding_value; } | ||||
|  | ||||
|     EIGEN_DEVICE_FUNC | ||||
|     const typename internal::remove_all<typename XprType::Nested>::type& | ||||
|     expression() const { return m_xpr; } | ||||
|  | ||||
|   protected: | ||||
|     typename XprType::Nested m_xpr; | ||||
|     const DenseIndex m_patch_planes; | ||||
|     const DenseIndex m_patch_rows; | ||||
|     const DenseIndex m_patch_cols; | ||||
|     const DenseIndex m_plane_strides; | ||||
|     const DenseIndex m_row_strides; | ||||
|     const DenseIndex m_col_strides; | ||||
|     const DenseIndex m_in_plane_strides; | ||||
|     const DenseIndex m_in_row_strides; | ||||
|     const DenseIndex m_in_col_strides; | ||||
|     const DenseIndex m_plane_inflate_strides; | ||||
|     const DenseIndex m_row_inflate_strides; | ||||
|     const DenseIndex m_col_inflate_strides; | ||||
|     const bool m_padding_explicit; | ||||
|     const DenseIndex m_padding_top_z; | ||||
|     const DenseIndex m_padding_bottom_z; | ||||
|     const DenseIndex m_padding_top; | ||||
|     const DenseIndex m_padding_bottom; | ||||
|     const DenseIndex m_padding_left; | ||||
|     const DenseIndex m_padding_right; | ||||
|     const PaddingType m_padding_type; | ||||
|     const Scalar m_padding_value; | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Eval as rvalue | ||||
| template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device> | ||||
| struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, Device> | ||||
| { | ||||
|   typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType; | ||||
|   typedef typename XprType::Index Index; | ||||
|   static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; | ||||
|   static const int NumDims = NumInputDims + 1; | ||||
|   typedef DSizes<Index, NumDims> Dimensions; | ||||
|   typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; | ||||
|   typedef typename XprType::CoeffReturnType CoeffReturnType; | ||||
|   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; | ||||
|   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; | ||||
|  | ||||
|   enum { | ||||
|     IsAligned = false, | ||||
|     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, | ||||
|     BlockAccess = false, | ||||
|     Layout = TensorEvaluator<ArgType, Device>::Layout, | ||||
|     CoordAccess = false, | ||||
|     RawAccess = false | ||||
|   }; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) | ||||
|       : m_impl(op.expression(), device) | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|  | ||||
|     m_paddingValue = op.padding_value(); | ||||
|  | ||||
|     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); | ||||
|  | ||||
|     // Cache a few variables. | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_inputDepth = input_dims[0]; | ||||
|       m_inputPlanes = input_dims[1]; | ||||
|       m_inputRows = input_dims[2]; | ||||
|       m_inputCols = input_dims[3]; | ||||
|     } else { | ||||
|       m_inputDepth = input_dims[NumInputDims-1]; | ||||
|       m_inputPlanes = input_dims[NumInputDims-2]; | ||||
|       m_inputRows = input_dims[NumInputDims-3]; | ||||
|       m_inputCols = input_dims[NumInputDims-4]; | ||||
|     } | ||||
|  | ||||
|     m_plane_strides = op.plane_strides(); | ||||
|     m_row_strides = op.row_strides(); | ||||
|     m_col_strides = op.col_strides(); | ||||
|  | ||||
|     // Input strides and effective input/patch size | ||||
|     m_in_plane_strides = op.in_plane_strides(); | ||||
|     m_in_row_strides = op.in_row_strides(); | ||||
|     m_in_col_strides = op.in_col_strides(); | ||||
|     m_plane_inflate_strides = op.plane_inflate_strides(); | ||||
|     m_row_inflate_strides = op.row_inflate_strides(); | ||||
|     m_col_inflate_strides = op.col_inflate_strides(); | ||||
|  | ||||
|     // The "effective" spatial size after inflating data with zeros. | ||||
|     m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1; | ||||
|     m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; | ||||
|     m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; | ||||
|     m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1); | ||||
|     m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); | ||||
|     m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); | ||||
|  | ||||
|     if (op.padding_explicit()) { | ||||
|       m_outputPlanes = numext::ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides)); | ||||
|       m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); | ||||
|       m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); | ||||
|       m_planePaddingTop = op.padding_top_z(); | ||||
|       m_rowPaddingTop = op.padding_top(); | ||||
|       m_colPaddingLeft = op.padding_left(); | ||||
|     } else { | ||||
|       // Computing padding from the type | ||||
|       switch (op.padding_type()) { | ||||
|         case PADDING_VALID: | ||||
|           m_outputPlanes = numext::ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides)); | ||||
|           m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); | ||||
|           m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); | ||||
|           m_planePaddingTop = 0; | ||||
|           m_rowPaddingTop = 0; | ||||
|           m_colPaddingLeft = 0; | ||||
|           break; | ||||
|         case PADDING_SAME: { | ||||
|           m_outputPlanes = numext::ceil(m_input_planes_eff / static_cast<float>(m_plane_strides)); | ||||
|           m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides)); | ||||
|           m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides)); | ||||
|           const Index dz = m_outputPlanes * m_plane_strides + m_patch_planes_eff - 1 - m_input_planes_eff; | ||||
|           const Index dy = m_outputRows * m_row_strides + m_patch_rows_eff - 1 - m_input_rows_eff; | ||||
|           const Index dx = m_outputCols * m_col_strides + m_patch_cols_eff - 1 - m_input_cols_eff; | ||||
|           m_planePaddingTop = dz - dz / 2; | ||||
|           m_rowPaddingTop = dy - dy / 2; | ||||
|           m_colPaddingLeft = dx - dx / 2; | ||||
|           break; | ||||
|         } | ||||
|         default: | ||||
|           eigen_assert(false && "unexpected padding"); | ||||
|       } | ||||
|     } | ||||
|     eigen_assert(m_outputRows > 0); | ||||
|     eigen_assert(m_outputCols > 0); | ||||
|     eigen_assert(m_outputPlanes > 0); | ||||
|  | ||||
|     // Dimensions for result of extraction. | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       // ColMajor | ||||
|       // 0: depth | ||||
|       // 1: patch_planes | ||||
|       // 2: patch_rows | ||||
|       // 3: patch_cols | ||||
|       // 4: number of patches | ||||
|       // 5 and beyond: anything else (such as batch). | ||||
|       m_dimensions[0] = input_dims[0]; | ||||
|       m_dimensions[1] = op.patch_planes(); | ||||
|       m_dimensions[2] = op.patch_rows(); | ||||
|       m_dimensions[3] = op.patch_cols(); | ||||
|       m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols; | ||||
|       for (int i = 5; i < NumDims; ++i) { | ||||
|         m_dimensions[i] = input_dims[i-1]; | ||||
|       } | ||||
|     } else { | ||||
|       // RowMajor | ||||
|       // NumDims-1: depth | ||||
|       // NumDims-2: patch_planes | ||||
|       // NumDims-3: patch_rows | ||||
|       // NumDims-4: patch_cols | ||||
|       // NumDims-5: number of patches | ||||
|       // NumDims-6 and beyond: anything else (such as batch). | ||||
|       m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; | ||||
|       m_dimensions[NumDims-2] = op.patch_planes(); | ||||
|       m_dimensions[NumDims-3] = op.patch_rows(); | ||||
|       m_dimensions[NumDims-4] = op.patch_cols(); | ||||
|       m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols; | ||||
|       for (int i = NumDims-6; i >= 0; --i) { | ||||
|         m_dimensions[i] = input_dims[i]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Strides for the output tensor. | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_rowStride = m_dimensions[1]; | ||||
|       m_colStride = m_dimensions[2] * m_rowStride; | ||||
|       m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0]; | ||||
|       m_otherStride = m_patchStride * m_dimensions[4]; | ||||
|     } else { | ||||
|       m_rowStride = m_dimensions[NumDims-2]; | ||||
|       m_colStride = m_dimensions[NumDims-3] * m_rowStride; | ||||
|       m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1]; | ||||
|       m_otherStride = m_patchStride * m_dimensions[NumDims-5]; | ||||
|     } | ||||
|  | ||||
|     // Strides for navigating through the input tensor. | ||||
|     m_planeInputStride = m_inputDepth; | ||||
|     m_rowInputStride = m_inputDepth * m_inputPlanes; | ||||
|     m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes; | ||||
|     m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes; | ||||
|  | ||||
|     m_outputPlanesRows = m_outputPlanes * m_outputRows; | ||||
|  | ||||
|     // Fast representations of different variables. | ||||
|     m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride); | ||||
|     m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride); | ||||
|     m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride); | ||||
|     m_fastRowStride = internal::TensorIntDivisor<Index>(m_rowStride); | ||||
|     m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides); | ||||
|     m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides); | ||||
|     m_fastInputPlaneStride = internal::TensorIntDivisor<Index>(m_plane_inflate_strides); | ||||
|     m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff); | ||||
|     m_fastOutputPlanes = internal::TensorIntDivisor<Index>(m_outputPlanes); | ||||
|     m_fastOutputPlanesRows = internal::TensorIntDivisor<Index>(m_outputPlanesRows); | ||||
|  | ||||
|     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { | ||||
|       m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]); | ||||
|     } else { | ||||
|       m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { | ||||
|     m_impl.evalSubExprsIfNeeded(NULL); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { | ||||
|     m_impl.cleanup(); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const | ||||
|   { | ||||
|     // Patch index corresponding to the passed in index. | ||||
|     const Index patchIndex = index / m_fastPatchStride; | ||||
|  | ||||
|     // Spatial offset within the patch. This has to be translated into 3D | ||||
|     // coordinates within the patch. | ||||
|     const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; | ||||
|  | ||||
|     // Batch, etc. | ||||
|     const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride; | ||||
|     const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; | ||||
|  | ||||
|     // Calculate column index in the input original tensor. | ||||
|     const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; | ||||
|     const Index colOffset = patchOffset / m_fastColStride; | ||||
|     const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; | ||||
|     const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); | ||||
|     if (inputCol < 0 || inputCol >= m_input_cols_eff || | ||||
|         ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { | ||||
|       return Scalar(m_paddingValue); | ||||
|     } | ||||
|  | ||||
|     // Calculate row index in the original input tensor. | ||||
|     const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; | ||||
|     const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride; | ||||
|     const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; | ||||
|     const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); | ||||
|     if (inputRow < 0 || inputRow >= m_input_rows_eff || | ||||
|         ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { | ||||
|       return Scalar(m_paddingValue); | ||||
|     } | ||||
|  | ||||
|     // Calculate plane index in the original input tensor. | ||||
|     const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); | ||||
|     const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride; | ||||
|     const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop; | ||||
|     const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0); | ||||
|     if (inputPlane < 0 || inputPlane >= m_input_planes_eff || | ||||
|         ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) { | ||||
|       return Scalar(m_paddingValue); | ||||
|     } | ||||
|  | ||||
|     const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; | ||||
|     const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; | ||||
|  | ||||
|     const Index inputIndex = depth + | ||||
|         origInputRow * m_rowInputStride + | ||||
|         origInputCol * m_colInputStride + | ||||
|         origInputPlane * m_planeInputStride + | ||||
|         otherIndex * m_otherInputStride; | ||||
|  | ||||
|     return m_impl.coeff(inputIndex); | ||||
|   } | ||||
|  | ||||
|   template<int LoadMode> | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const | ||||
|   { | ||||
|     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); | ||||
|  | ||||
|     if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 || | ||||
|         m_in_plane_strides != 1 || m_plane_inflate_strides != 1) { | ||||
|       return packetWithPossibleZero(index); | ||||
|     } | ||||
|  | ||||
|     const Index indices[2] = {index, index + PacketSize - 1}; | ||||
|     const Index patchIndex = indices[0] / m_fastPatchStride; | ||||
|     if (patchIndex != indices[1] / m_fastPatchStride) { | ||||
|       return packetWithPossibleZero(index); | ||||
|     } | ||||
|     const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride; | ||||
|     eigen_assert(otherIndex == indices[1] / m_fastOtherStride); | ||||
|  | ||||
|     // Find the offset of the element wrt the location of the first element. | ||||
|     const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, | ||||
|                                    (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; | ||||
|  | ||||
|     const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; | ||||
|     eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); | ||||
|  | ||||
|     const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; | ||||
|     const Index colOffsets[2] = { | ||||
|       patchOffsets[0] / m_fastColStride, | ||||
|       patchOffsets[1] / m_fastColStride}; | ||||
|  | ||||
|     // Calculate col indices in the original input tensor. | ||||
|     const Index inputCols[2] = { | ||||
|       colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft, | ||||
|       colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; | ||||
|     if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { | ||||
|       return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); | ||||
|     } | ||||
|  | ||||
|     if (inputCols[0] != inputCols[1]) { | ||||
|       return packetWithPossibleZero(index); | ||||
|     } | ||||
|  | ||||
|     const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; | ||||
|     const Index rowOffsets[2] = { | ||||
|       (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride, | ||||
|       (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride}; | ||||
|     eigen_assert(rowOffsets[0] <= rowOffsets[1]); | ||||
|     // Calculate col indices in the original input tensor. | ||||
|     const Index inputRows[2] = { | ||||
|       rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop, | ||||
|       rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; | ||||
|  | ||||
|     if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { | ||||
|       return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); | ||||
|     } | ||||
|  | ||||
|     if (inputRows[0] != inputRows[1]) { | ||||
|       return packetWithPossibleZero(index); | ||||
|     } | ||||
|  | ||||
|     const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); | ||||
|     const Index planeOffsets[2] = { | ||||
|       patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride, | ||||
|       patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride}; | ||||
|     eigen_assert(planeOffsets[0] <= planeOffsets[1]); | ||||
|     const Index inputPlanes[2] = { | ||||
|       planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop, | ||||
|       planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop}; | ||||
|  | ||||
|     if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) { | ||||
|       return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); | ||||
|     } | ||||
|  | ||||
|     if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) { | ||||
|       // no padding | ||||
|       const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; | ||||
|       const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; | ||||
|       const Index inputIndex = depth + | ||||
|           inputRows[0] * m_rowInputStride + | ||||
|           inputCols[0] * m_colInputStride + | ||||
|           m_planeInputStride * inputPlanes[0] + | ||||
|           otherIndex * m_otherInputStride; | ||||
|       return m_impl.template packet<Unaligned>(inputIndex); | ||||
|     } | ||||
|  | ||||
|     return packetWithPossibleZero(index); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost | ||||
|   costPerCoeff(bool vectorized) const { | ||||
|     const double compute_cost = | ||||
|         10 * TensorOpCost::DivCost<Index>() + 21 * TensorOpCost::MulCost<Index>() + | ||||
|         8 * TensorOpCost::AddCost<Index>(); | ||||
|     return TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } | ||||
|  | ||||
|   const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } | ||||
|  | ||||
|   Index planePaddingTop() const { return m_planePaddingTop; } | ||||
|   Index rowPaddingTop() const { return m_rowPaddingTop; } | ||||
|   Index colPaddingLeft() const { return m_colPaddingLeft; } | ||||
|   Index outputPlanes() const { return m_outputPlanes; } | ||||
|   Index outputRows() const { return m_outputRows; } | ||||
|   Index outputCols() const { return m_outputCols; } | ||||
|   Index userPlaneStride() const { return m_plane_strides; } | ||||
|   Index userRowStride() const { return m_row_strides; } | ||||
|   Index userColStride() const { return m_col_strides; } | ||||
|   Index userInPlaneStride() const { return m_in_plane_strides; } | ||||
|   Index userInRowStride() const { return m_in_row_strides; } | ||||
|   Index userInColStride() const { return m_in_col_strides; } | ||||
|   Index planeInflateStride() const { return m_plane_inflate_strides; } | ||||
|   Index rowInflateStride() const { return m_row_inflate_strides; } | ||||
|   Index colInflateStride() const { return m_col_inflate_strides; } | ||||
|  | ||||
|  protected: | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const | ||||
|   { | ||||
|     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; | ||||
|     for (int i = 0; i < PacketSize; ++i) { | ||||
|       values[i] = coeff(index+i); | ||||
|     } | ||||
|     PacketReturnType rslt = internal::pload<PacketReturnType>(values); | ||||
|     return rslt; | ||||
|   } | ||||
|  | ||||
|   Dimensions m_dimensions; | ||||
|  | ||||
|   // Parameters passed to the costructor. | ||||
|   Index m_plane_strides; | ||||
|   Index m_row_strides; | ||||
|   Index m_col_strides; | ||||
|  | ||||
|   Index m_outputPlanes; | ||||
|   Index m_outputRows; | ||||
|   Index m_outputCols; | ||||
|  | ||||
|   Index m_planePaddingTop; | ||||
|   Index m_rowPaddingTop; | ||||
|   Index m_colPaddingLeft; | ||||
|  | ||||
|   Index m_in_plane_strides; | ||||
|   Index m_in_row_strides; | ||||
|   Index m_in_col_strides; | ||||
|  | ||||
|   Index m_plane_inflate_strides; | ||||
|   Index m_row_inflate_strides; | ||||
|   Index m_col_inflate_strides; | ||||
|  | ||||
|   // Cached input size. | ||||
|   Index m_inputDepth; | ||||
|   Index m_inputPlanes; | ||||
|   Index m_inputRows; | ||||
|   Index m_inputCols; | ||||
|  | ||||
|   // Other cached variables. | ||||
|   Index m_outputPlanesRows; | ||||
|  | ||||
|   // Effective input/patch post-inflation size. | ||||
|   Index m_input_planes_eff; | ||||
|   Index m_input_rows_eff; | ||||
|   Index m_input_cols_eff; | ||||
|   Index m_patch_planes_eff; | ||||
|   Index m_patch_rows_eff; | ||||
|   Index m_patch_cols_eff; | ||||
|  | ||||
|   // Strides for the output tensor. | ||||
|   Index m_otherStride; | ||||
|   Index m_patchStride; | ||||
|   Index m_rowStride; | ||||
|   Index m_colStride; | ||||
|  | ||||
|   // Strides for the input tensor. | ||||
|   Index m_planeInputStride; | ||||
|   Index m_rowInputStride; | ||||
|   Index m_colInputStride; | ||||
|   Index m_otherInputStride; | ||||
|  | ||||
|   internal::TensorIntDivisor<Index> m_fastOtherStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastPatchStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastColStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastRowStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastInputPlaneStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastInputRowStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastInputColStride; | ||||
|   internal::TensorIntDivisor<Index> m_fastInputColsEff; | ||||
|   internal::TensorIntDivisor<Index> m_fastOutputPlanesRows; | ||||
|   internal::TensorIntDivisor<Index> m_fastOutputPlanes; | ||||
|   internal::TensorIntDivisor<Index> m_fastOutputDepth; | ||||
|  | ||||
|   Scalar m_paddingValue; | ||||
|  | ||||
|   TensorEvaluator<ArgType, Device> m_impl; | ||||
| }; | ||||
|  | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H | ||||
							
								
								
									
										293
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										293
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,293 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H | ||||
| #define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| class DynamicSGroup | ||||
| { | ||||
|   public: | ||||
|     inline explicit DynamicSGroup() : m_numIndices(1), m_elements(), m_generators(), m_globalFlags(0) { m_elements.push_back(ge(Generator(0, 0, 0))); } | ||||
|     inline DynamicSGroup(const DynamicSGroup& o) : m_numIndices(o.m_numIndices), m_elements(o.m_elements), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { } | ||||
|     inline DynamicSGroup(DynamicSGroup&& o) : m_numIndices(o.m_numIndices), m_elements(), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { std::swap(m_elements, o.m_elements); } | ||||
|     inline DynamicSGroup& operator=(const DynamicSGroup& o) { m_numIndices = o.m_numIndices; m_elements = o.m_elements; m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; } | ||||
|     inline DynamicSGroup& operator=(DynamicSGroup&& o) { m_numIndices = o.m_numIndices; std::swap(m_elements, o.m_elements); m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; } | ||||
|  | ||||
|     void add(int one, int two, int flags = 0); | ||||
|  | ||||
|     template<typename Gen_> | ||||
|     inline void add(Gen_) { add(Gen_::One, Gen_::Two, Gen_::Flags); } | ||||
|     inline void addSymmetry(int one, int two) { add(one, two, 0); } | ||||
|     inline void addAntiSymmetry(int one, int two) { add(one, two, NegationFlag); } | ||||
|     inline void addHermiticity(int one, int two) { add(one, two, ConjugationFlag); } | ||||
|     inline void addAntiHermiticity(int one, int two) { add(one, two, NegationFlag | ConjugationFlag); } | ||||
|  | ||||
|     template<typename Op, typename RV, typename Index, std::size_t N, typename... Args> | ||||
|     inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args) const | ||||
|     { | ||||
|       eigen_assert(N >= m_numIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); | ||||
|       for (std::size_t i = 0; i < size(); i++) | ||||
|         initial = Op::run(h_permute(i, idx, typename internal::gen_numeric_list<int, N>::type()), m_elements[i].flags, initial, std::forward<Args>(args)...); | ||||
|       return initial; | ||||
|     } | ||||
|  | ||||
|     template<typename Op, typename RV, typename Index, typename... Args> | ||||
|     inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args) const | ||||
|     { | ||||
|       eigen_assert(idx.size() >= m_numIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); | ||||
|       for (std::size_t i = 0; i < size(); i++) | ||||
|         initial = Op::run(h_permute(i, idx), m_elements[i].flags, initial, std::forward<Args>(args)...); | ||||
|       return initial; | ||||
|     } | ||||
|  | ||||
|     inline int globalFlags() const { return m_globalFlags; } | ||||
|     inline std::size_t size() const { return m_elements.size(); } | ||||
|  | ||||
|     template<typename Tensor_, typename... IndexTypes> | ||||
|     inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const | ||||
|     { | ||||
|       static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); | ||||
|       return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}}); | ||||
|     } | ||||
|  | ||||
|     template<typename Tensor_> | ||||
|     inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const | ||||
|     { | ||||
|       return internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup>(tensor, *this, indices); | ||||
|     } | ||||
|   private: | ||||
|     struct GroupElement { | ||||
|       std::vector<int> representation; | ||||
|       int flags; | ||||
|       bool isId() const | ||||
|       { | ||||
|         for (std::size_t i = 0; i < representation.size(); i++) | ||||
|           if (i != (size_t)representation[i]) | ||||
|             return false; | ||||
|         return true; | ||||
|       } | ||||
|     }; | ||||
|     struct Generator { | ||||
|       int one; | ||||
|       int two; | ||||
|       int flags; | ||||
|       constexpr inline Generator(int one_, int two_, int flags_) : one(one_), two(two_), flags(flags_) {} | ||||
|     }; | ||||
|  | ||||
|     std::size_t m_numIndices; | ||||
|     std::vector<GroupElement> m_elements; | ||||
|     std::vector<Generator> m_generators; | ||||
|     int m_globalFlags; | ||||
|  | ||||
|     template<typename Index, std::size_t N, int... n> | ||||
|     inline std::array<Index, N> h_permute(std::size_t which, const std::array<Index, N>& idx, internal::numeric_list<int, n...>) const | ||||
|     { | ||||
|       return std::array<Index, N>{{ idx[n >= m_numIndices ? n : m_elements[which].representation[n]]... }}; | ||||
|     } | ||||
|  | ||||
|     template<typename Index> | ||||
|     inline std::vector<Index> h_permute(std::size_t which, std::vector<Index> idx) const | ||||
|     { | ||||
|       std::vector<Index> result; | ||||
|       result.reserve(idx.size()); | ||||
|       for (auto k : m_elements[which].representation) | ||||
|         result.push_back(idx[k]); | ||||
|       for (std::size_t i = m_numIndices; i < idx.size(); i++) | ||||
|         result.push_back(idx[i]); | ||||
|       return result; | ||||
|     } | ||||
|  | ||||
|     inline GroupElement ge(Generator const& g) const | ||||
|     { | ||||
|       GroupElement result; | ||||
|       result.representation.reserve(m_numIndices); | ||||
|       result.flags = g.flags; | ||||
|       for (std::size_t k = 0; k < m_numIndices; k++) { | ||||
|         if (k == (std::size_t)g.one) | ||||
|           result.representation.push_back(g.two); | ||||
|         else if (k == (std::size_t)g.two) | ||||
|           result.representation.push_back(g.one); | ||||
|         else | ||||
|           result.representation.push_back(int(k)); | ||||
|       } | ||||
|       return result; | ||||
|     } | ||||
|  | ||||
|     GroupElement mul(GroupElement, GroupElement) const; | ||||
|     inline GroupElement mul(Generator g1, GroupElement g2) const | ||||
|     { | ||||
|       return mul(ge(g1), g2); | ||||
|     } | ||||
|  | ||||
|     inline GroupElement mul(GroupElement g1, Generator g2) const | ||||
|     { | ||||
|       return mul(g1, ge(g2)); | ||||
|     } | ||||
|  | ||||
|     inline GroupElement mul(Generator g1, Generator g2) const | ||||
|     { | ||||
|       return mul(ge(g1), ge(g2)); | ||||
|     } | ||||
|  | ||||
|     inline int findElement(GroupElement e) const | ||||
|     { | ||||
|       for (auto ee : m_elements) { | ||||
|         if (ee.representation == e.representation) | ||||
|           return ee.flags ^ e.flags; | ||||
|       } | ||||
|       return -1; | ||||
|     } | ||||
|  | ||||
|     void updateGlobalFlags(int flagDiffOfSameGenerator); | ||||
| }; | ||||
|  | ||||
| // dynamic symmetry group that auto-adds the template parameters in the constructor | ||||
| template<typename... Gen> | ||||
| class DynamicSGroupFromTemplateArgs : public DynamicSGroup | ||||
| { | ||||
|   public: | ||||
|     inline DynamicSGroupFromTemplateArgs() : DynamicSGroup() | ||||
|     { | ||||
|       add_all(internal::type_list<Gen...>()); | ||||
|     } | ||||
|     inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs const& other) : DynamicSGroup(other) { } | ||||
|     inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs&& other) : DynamicSGroup(other) { } | ||||
|     inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(const DynamicSGroupFromTemplateArgs<Gen...>& o) { DynamicSGroup::operator=(o); return *this; } | ||||
|     inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(DynamicSGroupFromTemplateArgs<Gen...>&& o) { DynamicSGroup::operator=(o); return *this; } | ||||
|    | ||||
|   private: | ||||
|     template<typename Gen1, typename... GenNext> | ||||
|     inline void add_all(internal::type_list<Gen1, GenNext...>) | ||||
|     { | ||||
|       add(Gen1()); | ||||
|       add_all(internal::type_list<GenNext...>()); | ||||
|     } | ||||
|  | ||||
|     inline void add_all(internal::type_list<>) | ||||
|     { | ||||
|     } | ||||
| }; | ||||
|  | ||||
| inline DynamicSGroup::GroupElement DynamicSGroup::mul(GroupElement g1, GroupElement g2) const | ||||
| { | ||||
|   eigen_internal_assert(g1.representation.size() == m_numIndices); | ||||
|   eigen_internal_assert(g2.representation.size() == m_numIndices); | ||||
|  | ||||
|   GroupElement result; | ||||
|   result.representation.reserve(m_numIndices); | ||||
|   for (std::size_t i = 0; i < m_numIndices; i++) { | ||||
|     int v = g2.representation[g1.representation[i]]; | ||||
|     eigen_assert(v >= 0); | ||||
|     result.representation.push_back(v); | ||||
|   } | ||||
|   result.flags = g1.flags ^ g2.flags; | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| inline void DynamicSGroup::add(int one, int two, int flags) | ||||
| { | ||||
|   eigen_assert(one >= 0); | ||||
|   eigen_assert(two >= 0); | ||||
|   eigen_assert(one != two); | ||||
|  | ||||
|   if ((std::size_t)one >= m_numIndices || (std::size_t)two >= m_numIndices) { | ||||
|     std::size_t newNumIndices = (one > two) ? one : two + 1; | ||||
|     for (auto& gelem : m_elements) { | ||||
|       gelem.representation.reserve(newNumIndices); | ||||
|       for (std::size_t i = m_numIndices; i < newNumIndices; i++) | ||||
|         gelem.representation.push_back(i); | ||||
|     } | ||||
|     m_numIndices = newNumIndices; | ||||
|   } | ||||
|  | ||||
|   Generator g{one, two, flags}; | ||||
|   GroupElement e = ge(g); | ||||
|  | ||||
|   /* special case for first generator */ | ||||
|   if (m_elements.size() == 1) { | ||||
|     while (!e.isId()) { | ||||
|       m_elements.push_back(e); | ||||
|       e = mul(e, g); | ||||
|     } | ||||
|  | ||||
|     if (e.flags > 0) | ||||
|       updateGlobalFlags(e.flags); | ||||
|  | ||||
|     // only add in case we didn't have identity | ||||
|     if (m_elements.size() > 1) | ||||
|       m_generators.push_back(g); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   int p = findElement(e); | ||||
|   if (p >= 0) { | ||||
|     updateGlobalFlags(p); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   std::size_t coset_order = m_elements.size(); | ||||
|   m_elements.push_back(e); | ||||
|   for (std::size_t i = 1; i < coset_order; i++) | ||||
|     m_elements.push_back(mul(m_elements[i], e)); | ||||
|   m_generators.push_back(g); | ||||
|  | ||||
|   std::size_t coset_rep = coset_order; | ||||
|   do { | ||||
|     for (auto g : m_generators) { | ||||
|       e = mul(m_elements[coset_rep], g); | ||||
|       p = findElement(e); | ||||
|       if (p < 0) { | ||||
|         // element not yet in group | ||||
|         m_elements.push_back(e); | ||||
|         for (std::size_t i = 1; i < coset_order; i++) | ||||
|           m_elements.push_back(mul(m_elements[i], e)); | ||||
|       } else if (p > 0) { | ||||
|         updateGlobalFlags(p); | ||||
|       } | ||||
|     } | ||||
|     coset_rep += coset_order; | ||||
|   } while (coset_rep < m_elements.size()); | ||||
| } | ||||
|  | ||||
| inline void DynamicSGroup::updateGlobalFlags(int flagDiffOfSameGenerator) | ||||
| { | ||||
|     switch (flagDiffOfSameGenerator) { | ||||
|       case 0: | ||||
|       default: | ||||
|         // nothing happened | ||||
|         break; | ||||
|       case NegationFlag: | ||||
|         // every element is it's own negative => whole tensor is zero | ||||
|         m_globalFlags |= GlobalZeroFlag; | ||||
|         break; | ||||
|       case ConjugationFlag: | ||||
|         // every element is it's own conjugate => whole tensor is real | ||||
|         m_globalFlags |= GlobalRealFlag; | ||||
|         break; | ||||
|       case (NegationFlag | ConjugationFlag): | ||||
|         // every element is it's own negative conjugate => whole tensor is imaginary | ||||
|         m_globalFlags |= GlobalImagFlag; | ||||
|         break; | ||||
|       /* NOTE: | ||||
|        *   since GlobalZeroFlag == GlobalRealFlag | GlobalImagFlag, if one generator | ||||
|        *   causes the tensor to be real and the next one to be imaginary, this will | ||||
|        *   trivially give the correct result | ||||
|        */ | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H | ||||
|  | ||||
| /* | ||||
|  * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; | ||||
|  */ | ||||
							
								
								
									
										236
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										236
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,236 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H | ||||
| #define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template<typename list> struct tensor_static_symgroup_permutate; | ||||
|  | ||||
| template<int... nn> | ||||
| struct tensor_static_symgroup_permutate<numeric_list<int, nn...>> | ||||
| { | ||||
|   constexpr static std::size_t N = sizeof...(nn); | ||||
|  | ||||
|   template<typename T> | ||||
|   constexpr static inline std::array<T, N> run(const std::array<T, N>& indices) | ||||
|   { | ||||
|     return {{indices[nn]...}}; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename indices_, int flags_> | ||||
| struct tensor_static_symgroup_element | ||||
| { | ||||
|   typedef indices_ indices; | ||||
|   constexpr static int flags = flags_; | ||||
| }; | ||||
|  | ||||
| template<typename Gen, int N> | ||||
| struct tensor_static_symgroup_element_ctor | ||||
| { | ||||
|   typedef tensor_static_symgroup_element< | ||||
|     typename gen_numeric_list_swapped_pair<int, N, Gen::One, Gen::Two>::type, | ||||
|     Gen::Flags | ||||
|   > type; | ||||
| }; | ||||
|  | ||||
| template<int N> | ||||
| struct tensor_static_symgroup_identity_ctor | ||||
| { | ||||
|   typedef tensor_static_symgroup_element< | ||||
|     typename gen_numeric_list<int, N>::type, | ||||
|     0 | ||||
|   > type; | ||||
| }; | ||||
|  | ||||
| template<typename iib> | ||||
| struct tensor_static_symgroup_multiply_helper | ||||
| { | ||||
|   template<int... iia> | ||||
|   constexpr static inline numeric_list<int, get<iia, iib>::value...> helper(numeric_list<int, iia...>) { | ||||
|     return numeric_list<int, get<iia, iib>::value...>(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename A, typename B> | ||||
| struct tensor_static_symgroup_multiply | ||||
| { | ||||
|   private: | ||||
|     typedef typename A::indices iia; | ||||
|     typedef typename B::indices iib; | ||||
|     constexpr static int ffa = A::flags; | ||||
|     constexpr static int ffb = B::flags; | ||||
|    | ||||
|   public: | ||||
|     static_assert(iia::count == iib::count, "Cannot multiply symmetry elements with different number of indices."); | ||||
|  | ||||
|     typedef tensor_static_symgroup_element< | ||||
|       decltype(tensor_static_symgroup_multiply_helper<iib>::helper(iia())), | ||||
|       ffa ^ ffb | ||||
|     > type; | ||||
| }; | ||||
|  | ||||
| template<typename A, typename B> | ||||
| struct tensor_static_symgroup_equality | ||||
| { | ||||
|     typedef typename A::indices iia; | ||||
|     typedef typename B::indices iib; | ||||
|     constexpr static int ffa = A::flags; | ||||
|     constexpr static int ffb = B::flags; | ||||
|     static_assert(iia::count == iib::count, "Cannot compare symmetry elements with different number of indices."); | ||||
|  | ||||
|     constexpr static bool value = is_same<iia, iib>::value; | ||||
|  | ||||
|   private: | ||||
|     /* this should be zero if they are identical, or else the tensor | ||||
|      * will be forced to be pure real, pure imaginary or even pure zero | ||||
|      */ | ||||
|     constexpr static int flags_cmp_ = ffa ^ ffb; | ||||
|  | ||||
|     /* either they are not equal, then we don't care whether the flags | ||||
|      * match, or they are equal, and then we have to check | ||||
|      */ | ||||
|     constexpr static bool is_zero      = value && flags_cmp_ == NegationFlag; | ||||
|     constexpr static bool is_real      = value && flags_cmp_ == ConjugationFlag; | ||||
|     constexpr static bool is_imag      = value && flags_cmp_ == (NegationFlag | ConjugationFlag); | ||||
|  | ||||
|   public: | ||||
|     constexpr static int global_flags =  | ||||
|       (is_real ? GlobalRealFlag : 0) | | ||||
|       (is_imag ? GlobalImagFlag : 0) | | ||||
|       (is_zero ? GlobalZeroFlag : 0); | ||||
| }; | ||||
|  | ||||
| template<std::size_t NumIndices, typename... Gen> | ||||
| struct tensor_static_symgroup | ||||
| { | ||||
|   typedef StaticSGroup<Gen...> type; | ||||
|   constexpr static std::size_t size = type::static_size; | ||||
| }; | ||||
|  | ||||
| template<typename Index, std::size_t N, int... ii, int... jj> | ||||
| constexpr static inline std::array<Index, N> tensor_static_symgroup_index_permute(std::array<Index, N> idx, internal::numeric_list<int, ii...>, internal::numeric_list<int, jj...>) | ||||
| { | ||||
|   return {{ idx[ii]..., idx[jj]... }}; | ||||
| } | ||||
|  | ||||
| template<typename Index, int... ii> | ||||
| static inline std::vector<Index> tensor_static_symgroup_index_permute(std::vector<Index> idx, internal::numeric_list<int, ii...>) | ||||
| { | ||||
|   std::vector<Index> result{{ idx[ii]... }}; | ||||
|   std::size_t target_size = idx.size(); | ||||
|   for (std::size_t i = result.size(); i < target_size; i++) | ||||
|     result.push_back(idx[i]); | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| template<typename T> struct tensor_static_symgroup_do_apply; | ||||
|  | ||||
| template<typename first, typename... next> | ||||
| struct tensor_static_symgroup_do_apply<internal::type_list<first, next...>> | ||||
| { | ||||
|   template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args> | ||||
|   static inline RV run(const std::array<Index, NumIndices>& idx, RV initial, Args&&... args) | ||||
|   { | ||||
|     static_assert(NumIndices >= SGNumIndices, "Can only apply symmetry group to objects that have at least the required amount of indices."); | ||||
|     typedef typename internal::gen_numeric_list<int, NumIndices - SGNumIndices, SGNumIndices>::type remaining_indices; | ||||
|     initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices(), remaining_indices()), first::flags, initial, std::forward<Args>(args)...); | ||||
|     return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...); | ||||
|   } | ||||
|  | ||||
|   template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args> | ||||
|   static inline RV run(const std::vector<Index>& idx, RV initial, Args&&... args) | ||||
|   { | ||||
|     eigen_assert(idx.size() >= SGNumIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); | ||||
|     initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices()), first::flags, initial, std::forward<Args>(args)...); | ||||
|     return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<EIGEN_TPL_PP_SPEC_HACK_DEF(typename, empty)> | ||||
| struct tensor_static_symgroup_do_apply<internal::type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>> | ||||
| { | ||||
|   template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args> | ||||
|   static inline RV run(const std::array<Index, NumIndices>&, RV initial, Args&&...) | ||||
|   { | ||||
|     // do nothing | ||||
|     return initial; | ||||
|   } | ||||
|  | ||||
|   template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args> | ||||
|   static inline RV run(const std::vector<Index>&, RV initial, Args&&...) | ||||
|   { | ||||
|     // do nothing | ||||
|     return initial; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } // end namespace internal | ||||
|  | ||||
| template<typename... Gen> | ||||
| class StaticSGroup | ||||
| { | ||||
|     constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value; | ||||
|     typedef internal::group_theory::enumerate_group_elements< | ||||
|       internal::tensor_static_symgroup_multiply, | ||||
|       internal::tensor_static_symgroup_equality, | ||||
|       typename internal::tensor_static_symgroup_identity_ctor<NumIndices>::type, | ||||
|       internal::type_list<typename internal::tensor_static_symgroup_element_ctor<Gen, NumIndices>::type...> | ||||
|     > group_elements; | ||||
|     typedef typename group_elements::type ge; | ||||
|   public: | ||||
|     constexpr inline StaticSGroup() {} | ||||
|     constexpr inline StaticSGroup(const StaticSGroup<Gen...>&) {} | ||||
|     constexpr inline StaticSGroup(StaticSGroup<Gen...>&&) {} | ||||
|  | ||||
|     template<typename Op, typename RV, typename Index, std::size_t N, typename... Args> | ||||
|     static inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args) | ||||
|     { | ||||
|       return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...); | ||||
|     } | ||||
|  | ||||
|     template<typename Op, typename RV, typename Index, typename... Args> | ||||
|     static inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args) | ||||
|     { | ||||
|       eigen_assert(idx.size() == NumIndices); | ||||
|       return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...); | ||||
|     } | ||||
|  | ||||
|     constexpr static std::size_t static_size = ge::count; | ||||
|  | ||||
|     constexpr static inline std::size_t size() { | ||||
|       return ge::count; | ||||
|     } | ||||
|     constexpr static inline int globalFlags() { return group_elements::global_flags; } | ||||
|  | ||||
|     template<typename Tensor_, typename... IndexTypes> | ||||
|     inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const | ||||
|     { | ||||
|       static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); | ||||
|       return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}}); | ||||
|     } | ||||
|  | ||||
|     template<typename Tensor_> | ||||
|     inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const | ||||
|     { | ||||
|       return internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>>(tensor, *this, indices); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H | ||||
|  | ||||
| /* | ||||
|  * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; | ||||
|  */ | ||||
							
								
								
									
										338
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										338
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,338 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H | ||||
| #define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| enum { | ||||
|   NegationFlag           = 0x01, | ||||
|   ConjugationFlag        = 0x02 | ||||
| }; | ||||
|  | ||||
| enum { | ||||
|   GlobalRealFlag         = 0x01, | ||||
|   GlobalImagFlag         = 0x02, | ||||
|   GlobalZeroFlag         = 0x03 | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template<std::size_t NumIndices, typename... Sym>                   struct tensor_symmetry_pre_analysis; | ||||
| template<std::size_t NumIndices, typename... Sym>                   struct tensor_static_symgroup; | ||||
| template<bool instantiate, std::size_t NumIndices, typename... Sym> struct tensor_static_symgroup_if; | ||||
| template<typename Tensor_> struct tensor_symmetry_calculate_flags; | ||||
| template<typename Tensor_> struct tensor_symmetry_assign_value; | ||||
| template<typename... Sym> struct tensor_symmetry_num_indices; | ||||
|  | ||||
| } // end namespace internal | ||||
|  | ||||
| template<int One_, int Two_> | ||||
| struct Symmetry | ||||
| { | ||||
|   static_assert(One_ != Two_, "Symmetries must cover distinct indices."); | ||||
|   constexpr static int One = One_; | ||||
|   constexpr static int Two = Two_; | ||||
|   constexpr static int Flags = 0; | ||||
| }; | ||||
|  | ||||
| template<int One_, int Two_> | ||||
| struct AntiSymmetry | ||||
| { | ||||
|   static_assert(One_ != Two_, "Symmetries must cover distinct indices."); | ||||
|   constexpr static int One = One_; | ||||
|   constexpr static int Two = Two_; | ||||
|   constexpr static int Flags = NegationFlag; | ||||
| }; | ||||
|  | ||||
| template<int One_, int Two_> | ||||
| struct Hermiticity | ||||
| { | ||||
|   static_assert(One_ != Two_, "Symmetries must cover distinct indices."); | ||||
|   constexpr static int One = One_; | ||||
|   constexpr static int Two = Two_; | ||||
|   constexpr static int Flags = ConjugationFlag; | ||||
| }; | ||||
|  | ||||
| template<int One_, int Two_> | ||||
| struct AntiHermiticity | ||||
| { | ||||
|   static_assert(One_ != Two_, "Symmetries must cover distinct indices."); | ||||
|   constexpr static int One = One_; | ||||
|   constexpr static int Two = Two_; | ||||
|   constexpr static int Flags = ConjugationFlag | NegationFlag; | ||||
| }; | ||||
|  | ||||
| /** \class DynamicSGroup | ||||
|   * \ingroup TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Dynamic symmetry group | ||||
|   * | ||||
|   * The %DynamicSGroup class represents a symmetry group that need not be known at | ||||
|   * compile time. It is useful if one wants to support arbitrary run-time defineable | ||||
|   * symmetries for tensors, but it is also instantiated if a symmetry group is defined | ||||
|   * at compile time that would be either too large for the compiler to reasonably | ||||
|   * generate (using templates to calculate this at compile time is very inefficient) | ||||
|   * or that the compiler could generate the group but that it wouldn't make sense to | ||||
|   * unroll the loop for setting coefficients anymore. | ||||
|   */ | ||||
| class DynamicSGroup; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class DynamicSGroupFromTemplateArgs | ||||
|   * \ingroup TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Dynamic symmetry group, initialized from template arguments | ||||
|   * | ||||
|   * This class is a child class of DynamicSGroup. It uses the template arguments | ||||
|   * specified to initialize itself. | ||||
|   */ | ||||
| template<typename... Gen> | ||||
| class DynamicSGroupFromTemplateArgs; | ||||
|  | ||||
| /** \class StaticSGroup | ||||
|   * \ingroup TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Static symmetry group | ||||
|   * | ||||
|   * This class represents a symmetry group that is known and resolved completely | ||||
|   * at compile time. Ideally, no run-time penalty is incurred compared to the | ||||
|   * manual unrolling of the symmetry. | ||||
|   * | ||||
|   * <b><i>CAUTION:</i></b> | ||||
|   * | ||||
|   * Do not use this class directly for large symmetry groups. The compiler | ||||
|   * may run into a limit, or segfault or in the very least will take a very, | ||||
|   * very, very long time to compile the code. Use the SGroup class instead | ||||
|   * if you want a static group. That class contains logic that will | ||||
|   * automatically select the DynamicSGroup class instead if the symmetry | ||||
|   * group becomes too large. (In that case, unrolling may not even be | ||||
|   * beneficial.) | ||||
|   */ | ||||
| template<typename... Gen> | ||||
| class StaticSGroup; | ||||
|  | ||||
| /** \class SGroup | ||||
|   * \ingroup TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Symmetry group, initialized from template arguments | ||||
|   * | ||||
|   * This class represents a symmetry group whose generators are already | ||||
|   * known at compile time. It may or may not be resolved at compile time, | ||||
|   * depending on the estimated size of the group. | ||||
|   * | ||||
|   * \sa StaticSGroup | ||||
|   * \sa DynamicSGroup | ||||
|   */ | ||||
| template<typename... Gen> | ||||
| class SGroup : public internal::tensor_symmetry_pre_analysis<internal::tensor_symmetry_num_indices<Gen...>::value, Gen...>::root_type | ||||
| { | ||||
|   public: | ||||
|     constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value; | ||||
|     typedef typename internal::tensor_symmetry_pre_analysis<NumIndices, Gen...>::root_type Base; | ||||
|  | ||||
|     // make standard constructors + assignment operators public | ||||
|     inline SGroup() : Base() { } | ||||
|     inline SGroup(const SGroup<Gen...>& other) : Base(other) { } | ||||
|     inline SGroup(SGroup<Gen...>&& other) : Base(other) { } | ||||
|     inline SGroup<Gen...>& operator=(const SGroup<Gen...>& other) { Base::operator=(other); return *this; } | ||||
|     inline SGroup<Gen...>& operator=(SGroup<Gen...>&& other) { Base::operator=(other); return *this; } | ||||
|  | ||||
|     // all else is defined in the base class | ||||
| }; | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| template<typename... Sym> struct tensor_symmetry_num_indices | ||||
| { | ||||
|   constexpr static std::size_t value = 1; | ||||
| }; | ||||
|  | ||||
| template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> | ||||
| { | ||||
| private: | ||||
|   constexpr static std::size_t One = static_cast<std::size_t>(One_); | ||||
|   constexpr static std::size_t Two = static_cast<std::size_t>(Two_); | ||||
|   constexpr static std::size_t Three = tensor_symmetry_num_indices<Sym...>::value; | ||||
|  | ||||
|   // don't use std::max, since it's not constexpr until C++14... | ||||
|   constexpr static std::size_t maxOneTwoPlusOne = ((One > Two) ? One : Two) + 1; | ||||
| public: | ||||
|   constexpr static std::size_t value = (maxOneTwoPlusOne > Three) ? maxOneTwoPlusOne : Three; | ||||
| }; | ||||
|  | ||||
| template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiSymmetry<One_, Two_>, Sym...> | ||||
|   : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; | ||||
| template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Hermiticity<One_, Two_>, Sym...> | ||||
|   : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; | ||||
| template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiHermiticity<One_, Two_>, Sym...> | ||||
|   : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class tensor_symmetry_pre_analysis | ||||
|   * \ingroup TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Pre-select whether to use a static or dynamic symmetry group | ||||
|   * | ||||
|   * When a symmetry group could in principle be determined at compile time, | ||||
|   * this template implements the logic whether to actually do that or whether | ||||
|   * to rather defer that to runtime. | ||||
|   * | ||||
|   * The logic is as follows: | ||||
|   * <dl> | ||||
|   * <dt><b>No generators (trivial symmetry):</b></dt> | ||||
|   * <dd>Use a trivial static group. Ideally, this has no performance impact | ||||
|   *     compared to not using symmetry at all. In practice, this might not | ||||
|   *     be the case.</dd> | ||||
|   * <dt><b>More than 4 generators:</b></dt> | ||||
|   * <dd>Calculate the group at run time, it is likely far too large for the | ||||
|   *     compiler to be able to properly generate it in a realistic time.</dd> | ||||
|   * <dt><b>Up to and including 4 generators:</b></dt> | ||||
|   * <dd>Actually enumerate all group elements, but then check how many there | ||||
|   *     are. If there are more than 16, it is unlikely that unrolling the | ||||
|   *     loop (as is done in the static compile-time case) is sensible, so | ||||
|   *     use a dynamic group instead. If there are at most 16 elements, actually | ||||
|   *     use that static group. Note that the largest group with 4 generators | ||||
|   *     still compiles with reasonable resources.</dd> | ||||
|   * </dl> | ||||
|   * | ||||
|   * Note: Example compile time performance with g++-4.6 on an Intenl Core i5-3470 | ||||
|   *       with 16 GiB RAM (all generators non-redundant and the subgroups don't | ||||
|   *       factorize): | ||||
|   * | ||||
|   *          # Generators          -O0 -ggdb               -O2 | ||||
|   *          ------------------------------------------------------------------- | ||||
|   *          1                 0.5 s  /   250 MiB     0.45s /   230 MiB | ||||
|   *          2                 0.5 s  /   260 MiB     0.5 s /   250 MiB | ||||
|   *          3                 0.65s  /   310 MiB     0.62s /   310 MiB | ||||
|   *          4                 2.2 s  /   860 MiB     1.7 s /   770 MiB | ||||
|   *          5               130   s  / 13000 MiB   120   s / 11000 MiB | ||||
|   * | ||||
|   * It is clear that everything is still very efficient up to 4 generators, then | ||||
|   * the memory and CPU requirements become unreasonable. Thus we only instantiate | ||||
|   * the template group theory logic if the number of generators supplied is 4 or | ||||
|   * lower, otherwise this will be forced to be done during runtime, where the | ||||
|   * algorithm is reasonably fast. | ||||
|   */ | ||||
| template<std::size_t NumIndices> | ||||
| struct tensor_symmetry_pre_analysis<NumIndices> | ||||
| { | ||||
|   typedef StaticSGroup<> root_type; | ||||
| }; | ||||
|  | ||||
| template<std::size_t NumIndices, typename Gen_, typename... Gens_> | ||||
| struct tensor_symmetry_pre_analysis<NumIndices, Gen_, Gens_...> | ||||
| { | ||||
|   constexpr static std::size_t max_static_generators = 4; | ||||
|   constexpr static std::size_t max_static_elements = 16; | ||||
|   typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper; | ||||
|   constexpr static std::size_t possible_size = helper::size; | ||||
|  | ||||
|   typedef typename conditional< | ||||
|     possible_size == 0 || possible_size >= max_static_elements, | ||||
|     DynamicSGroupFromTemplateArgs<Gen_, Gens_...>, | ||||
|     typename helper::type | ||||
|   >::type root_type; | ||||
| }; | ||||
|  | ||||
| template<bool instantiate, std::size_t NumIndices, typename... Gens> | ||||
| struct tensor_static_symgroup_if | ||||
| { | ||||
|   constexpr static std::size_t size = 0; | ||||
|   typedef void type; | ||||
| }; | ||||
|  | ||||
| template<std::size_t NumIndices, typename... Gens> | ||||
| struct tensor_static_symgroup_if<true, NumIndices, Gens...> : tensor_static_symgroup<NumIndices, Gens...> {}; | ||||
|  | ||||
| template<typename Tensor_> | ||||
| struct tensor_symmetry_assign_value | ||||
| { | ||||
|   typedef typename Tensor_::Index Index; | ||||
|   typedef typename Tensor_::Scalar Scalar; | ||||
|   constexpr static std::size_t NumIndices = Tensor_::NumIndices; | ||||
|  | ||||
|   static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transformation_flags, int dummy, Tensor_& tensor, const Scalar& value_) | ||||
|   { | ||||
|     Scalar value(value_); | ||||
|     if (transformation_flags & ConjugationFlag) | ||||
|       value = numext::conj(value); | ||||
|     if (transformation_flags & NegationFlag) | ||||
|       value = -value; | ||||
|     tensor.coeffRef(transformed_indices) = value; | ||||
|     return dummy; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Tensor_> | ||||
| struct tensor_symmetry_calculate_flags | ||||
| { | ||||
|   typedef typename Tensor_::Index Index; | ||||
|   constexpr static std::size_t NumIndices = Tensor_::NumIndices; | ||||
|  | ||||
|   static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transform_flags, int current_flags, const std::array<Index, NumIndices>& orig_indices) | ||||
|   { | ||||
|     if (transformed_indices == orig_indices) { | ||||
|       if (transform_flags & (ConjugationFlag | NegationFlag)) | ||||
|         return current_flags | GlobalImagFlag; // anti-hermitian diagonal | ||||
|       else if (transform_flags & ConjugationFlag) | ||||
|         return current_flags | GlobalRealFlag; // hermitian diagonal | ||||
|       else if (transform_flags & NegationFlag) | ||||
|         return current_flags | GlobalZeroFlag; // anti-symmetric diagonal | ||||
|     } | ||||
|     return current_flags; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Tensor_, typename Symmetry_, int Flags = 0> | ||||
| class tensor_symmetry_value_setter | ||||
| { | ||||
|   public: | ||||
|     typedef typename Tensor_::Index Index; | ||||
|     typedef typename Tensor_::Scalar Scalar; | ||||
|     constexpr static std::size_t NumIndices = Tensor_::NumIndices; | ||||
|  | ||||
|     inline tensor_symmetry_value_setter(Tensor_& tensor, Symmetry_ const& symmetry, std::array<Index, NumIndices> const& indices) | ||||
|       : m_tensor(tensor), m_symmetry(symmetry), m_indices(indices) { } | ||||
|  | ||||
|     inline tensor_symmetry_value_setter<Tensor_, Symmetry_, Flags>& operator=(Scalar const& value) | ||||
|     { | ||||
|       doAssign(value); | ||||
|       return *this; | ||||
|     } | ||||
|   private: | ||||
|     Tensor_& m_tensor; | ||||
|     Symmetry_ m_symmetry; | ||||
|     std::array<Index, NumIndices> m_indices; | ||||
|  | ||||
|     inline void doAssign(Scalar const& value) | ||||
|     { | ||||
|       #ifdef EIGEN_TENSOR_SYMMETRY_CHECK_VALUES | ||||
|         int value_flags = m_symmetry.template apply<internal::tensor_symmetry_calculate_flags<Tensor_>, int>(m_indices, m_symmetry.globalFlags(), m_indices); | ||||
|         if (value_flags & GlobalRealFlag) | ||||
|           eigen_assert(numext::imag(value) == 0); | ||||
|         if (value_flags & GlobalImagFlag) | ||||
|           eigen_assert(numext::real(value) == 0); | ||||
|       #endif | ||||
|       m_symmetry.template apply<internal::tensor_symmetry_assign_value<Tensor_>, int>(m_indices, 0, m_tensor, value); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| } // end namespace internal | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H | ||||
|  | ||||
| /* | ||||
|  * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; | ||||
|  */ | ||||
							
								
								
									
										669
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										669
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,669 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H | ||||
| #define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| namespace group_theory { | ||||
|  | ||||
| /** \internal | ||||
|   * \file CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h | ||||
|   * This file contains C++ templates that implement group theory algorithms. | ||||
|   * | ||||
|   * The algorithms allow for a compile-time analysis of finite groups. | ||||
|   * | ||||
|   * Currently only Dimino's algorithm is implemented, which returns a list | ||||
|   * of all elements in a group given a set of (possibly redundant) generators. | ||||
|   * (One could also do that with the so-called orbital algorithm, but that | ||||
|   * is much more expensive and usually has no advantages.) | ||||
|   */ | ||||
|  | ||||
| /********************************************************************** | ||||
|  *                "Ok kid, here is where it gets complicated." | ||||
|  *                         - Amelia Pond in the "Doctor Who" episode | ||||
|  *                           "The Big Bang" | ||||
|  * | ||||
|  * Dimino's algorithm | ||||
|  * ================== | ||||
|  * | ||||
|  * The following is Dimino's algorithm in sequential form: | ||||
|  * | ||||
|  * Input: identity element, list of generators, equality check, | ||||
|  *        multiplication operation | ||||
|  * Output: list of group elements | ||||
|  * | ||||
|  * 1. add identity element | ||||
|  * 2. remove identities from list of generators | ||||
|  * 3. add all powers of first generator that aren't the | ||||
|  *    identity element | ||||
|  * 4. go through all remaining generators: | ||||
|  *        a. if generator is already in the list of elements | ||||
|  *                -> do nothing | ||||
|  *        b. otherwise | ||||
|  *                i.   remember current # of elements | ||||
|  *                     (i.e. the size of the current subgroup) | ||||
|  *                ii.  add all current elements (which includes | ||||
|  *                     the identity) each multiplied from right | ||||
|  *                     with the current generator to the group | ||||
|  *                iii. add all remaining cosets that are generated | ||||
|  *                     by products of the new generator with itself | ||||
|  *                     and all other generators seen so far | ||||
|  * | ||||
|  * In functional form, this is implemented as a long set of recursive | ||||
|  * templates that have a complicated relationship. | ||||
|  * | ||||
|  * The main interface for Dimino's algorithm is the template | ||||
|  * enumerate_group_elements. All lists are implemented as variadic | ||||
|  * type_list<typename...> and numeric_list<typename = int, int...> | ||||
|  * templates. | ||||
|  * | ||||
|  * 'Calling' templates is usually done via typedefs. | ||||
|  * | ||||
|  * This algorithm is an extended version of the basic version. The | ||||
|  * extension consists in the fact that each group element has a set | ||||
|  * of flags associated with it. Multiplication of two group elements | ||||
|  * with each other results in a group element whose flags are the | ||||
|  * XOR of the flags of the previous elements. Each time the algorithm | ||||
|  * notices that a group element it just calculated is already in the | ||||
|  * list of current elements, the flags of both will be compared and | ||||
|  * added to the so-called 'global flags' of the group. | ||||
|  * | ||||
|  * The rationale behind this extension is that this allows not only | ||||
|  * for the description of symmetries between tensor indices, but | ||||
|  * also allows for the description of hermiticity, antisymmetry and | ||||
|  * antihermiticity. Negation and conjugation each are specific bit | ||||
|  * in the flags value and if two different ways to reach a group | ||||
|  * element lead to two different flags, this poses a constraint on | ||||
|  * the allowed values of the resulting tensor. For example, if a | ||||
|  * group element is reach both with and without the conjugation | ||||
|  * flags, it is clear that the resulting tensor has to be real. | ||||
|  * | ||||
|  * Note that this flag mechanism is quite generic and may have other | ||||
|  * uses beyond tensor properties. | ||||
|  * | ||||
|  * IMPORTANT:  | ||||
|  *     This algorithm assumes the group to be finite. If you try to | ||||
|  *     run it with a group that's infinite, the algorithm will only | ||||
|  *     terminate once you hit a compiler limit (max template depth). | ||||
|  *     Also note that trying to use this implementation to create a | ||||
|  *     very large group will probably either make you hit the same | ||||
|  *     limit, cause the compiler to segfault or at the very least | ||||
|  *     take a *really* long time (hours, days, weeks - sic!) to | ||||
|  *     compile. It is not recommended to plug in more than 4 | ||||
|  *     generators, unless they are independent of each other. | ||||
|  */ | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class strip_identities | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Cleanse a list of group elements of the identity element | ||||
|   * | ||||
|   * This template is used to make a first pass through all initial | ||||
|   * generators of Dimino's algorithm and remove the identity | ||||
|   * elements. | ||||
|   * | ||||
|   * \sa enumerate_group_elements | ||||
|   */ | ||||
| template<template<typename, typename> class Equality, typename id, typename L> struct strip_identities; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename t, | ||||
|   typename... ts | ||||
| > | ||||
| struct strip_identities<Equality, id, type_list<t, ts...>> | ||||
| { | ||||
|   typedef typename conditional< | ||||
|     Equality<id, t>::value, | ||||
|     typename strip_identities<Equality, id, type_list<ts...>>::type, | ||||
|     typename concat<type_list<t>, typename strip_identities<Equality, id, type_list<ts...>>::type>::type | ||||
|   >::type type; | ||||
|   constexpr static int global_flags = Equality<id, t>::global_flags | strip_identities<Equality, id, type_list<ts...>>::global_flags; | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id | ||||
|   EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, ts) | ||||
| > | ||||
| struct strip_identities<Equality, id, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(ts)>> | ||||
| { | ||||
|   typedef type_list<> type; | ||||
|   constexpr static int global_flags = 0; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class dimino_first_step_elements_helper  | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Recursive template that adds powers of the first generator to the list of group elements | ||||
|   * | ||||
|   * This template calls itself recursively to add powers of the first | ||||
|   * generator to the list of group elements. It stops if it reaches | ||||
|   * the identity element again. | ||||
|   * | ||||
|   * \sa enumerate_group_elements, dimino_first_step_elements | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename g, | ||||
|   typename current_element, | ||||
|   typename elements, | ||||
|   bool dont_add_current_element   // = false | ||||
| > | ||||
| struct dimino_first_step_elements_helper | ||||
| #ifndef EIGEN_PARSED_BY_DOXYGEN | ||||
|   : // recursive inheritance is too difficult for Doxygen | ||||
|   public dimino_first_step_elements_helper< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     g, | ||||
|     typename Multiply<current_element, g>::type, | ||||
|     typename concat<elements, type_list<current_element>>::type, | ||||
|     Equality<typename Multiply<current_element, g>::type, id>::value | ||||
|   > {}; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename g, | ||||
|   typename current_element, | ||||
|   typename elements | ||||
| > | ||||
| struct dimino_first_step_elements_helper<Multiply, Equality, id, g, current_element, elements, true> | ||||
| #endif // EIGEN_PARSED_BY_DOXYGEN | ||||
| { | ||||
|   typedef elements type; | ||||
|   constexpr static int global_flags = Equality<current_element, id>::global_flags; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class dimino_first_step_elements | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Add all powers of the first generator to the list of group elements | ||||
|   * | ||||
|   * This template takes the first non-identity generator and generates the initial | ||||
|   * list of elements which consists of all powers of that generator. For a group | ||||
|   * with just one generated, it would be enumerated after this. | ||||
|   * | ||||
|   * \sa enumerate_group_elements | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename generators | ||||
| > | ||||
| struct dimino_first_step_elements | ||||
| { | ||||
|   typedef typename get<0, generators>::type first_generator; | ||||
|   typedef typename skip<1, generators>::type next_generators; | ||||
|   typedef type_list<first_generator> generators_done; | ||||
|  | ||||
|   typedef dimino_first_step_elements_helper< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     first_generator, | ||||
|     first_generator, | ||||
|     type_list<id>, | ||||
|     false | ||||
|   > helper; | ||||
|   typedef typename helper::type type; | ||||
|   constexpr static int global_flags = helper::global_flags; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class dimino_get_coset_elements | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Generate all elements of a specific coset | ||||
|   * | ||||
|   * This template generates all the elements of a specific coset by | ||||
|   * multiplying all elements in the given subgroup with the new | ||||
|   * coset representative. Note that the first element of the | ||||
|   * subgroup is always the identity element, so the first element of | ||||
|   * ther result of this template is going to be the coset | ||||
|   * representative itself. | ||||
|   * | ||||
|   * Note that this template accepts an additional boolean parameter | ||||
|   * that specifies whether to actually generate the coset (true) or | ||||
|   * just return an empty list (false). | ||||
|   * | ||||
|   * \sa enumerate_group_elements, dimino_add_cosets_for_rep | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   typename sub_group_elements, | ||||
|   typename new_coset_rep, | ||||
|   bool generate_coset      // = true | ||||
| > | ||||
| struct dimino_get_coset_elements | ||||
| { | ||||
|   typedef typename apply_op_from_right<Multiply, new_coset_rep, sub_group_elements>::type type; | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   typename sub_group_elements, | ||||
|   typename new_coset_rep | ||||
| > | ||||
| struct dimino_get_coset_elements<Multiply, sub_group_elements, new_coset_rep, false> | ||||
| { | ||||
|   typedef type_list<> type; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class dimino_add_cosets_for_rep | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Recursive template for adding coset spaces | ||||
|   * | ||||
|   * This template multiplies the coset representative with a generator | ||||
|   * from the list of previous generators. If the new element is not in | ||||
|   * the group already, it adds the corresponding coset. Finally it | ||||
|   * proceeds to call itself with the next generator from the list. | ||||
|   * | ||||
|   * \sa enumerate_group_elements, dimino_add_all_coset_spaces | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename sub_group_elements, | ||||
|   typename elements, | ||||
|   typename generators, | ||||
|   typename rep_element, | ||||
|   int sub_group_size | ||||
| > | ||||
| struct dimino_add_cosets_for_rep; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename sub_group_elements, | ||||
|   typename elements, | ||||
|   typename g, | ||||
|   typename... gs, | ||||
|   typename rep_element, | ||||
|   int sub_group_size | ||||
| > | ||||
| struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<g, gs...>, rep_element, sub_group_size> | ||||
| { | ||||
|   typedef typename Multiply<rep_element, g>::type new_coset_rep; | ||||
|   typedef contained_in_list_gf<Equality, new_coset_rep, elements> _cil; | ||||
|   constexpr static bool add_coset = !_cil::value; | ||||
|  | ||||
|   typedef typename dimino_get_coset_elements< | ||||
|     Multiply, | ||||
|     sub_group_elements, | ||||
|     new_coset_rep, | ||||
|     add_coset | ||||
|   >::type coset_elements; | ||||
|  | ||||
|   typedef dimino_add_cosets_for_rep< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     sub_group_elements, | ||||
|     typename concat<elements, coset_elements>::type, | ||||
|     type_list<gs...>, | ||||
|     rep_element, | ||||
|     sub_group_size | ||||
|   > _helper; | ||||
|  | ||||
|   typedef typename _helper::type type; | ||||
|   constexpr static int global_flags = _cil::global_flags | _helper::global_flags; | ||||
|  | ||||
|   /* Note that we don't have to update global flags here, since | ||||
|    * we will only add these elements if they are not part of | ||||
|    * the group already. But that only happens if the coset rep | ||||
|    * is not already in the group, so the check for the coset rep | ||||
|    * will catch this. | ||||
|    */ | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename sub_group_elements, | ||||
|   typename elements | ||||
|   EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty), | ||||
|   typename rep_element, | ||||
|   int sub_group_size | ||||
| > | ||||
| struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, rep_element, sub_group_size> | ||||
| { | ||||
|   typedef elements type; | ||||
|   constexpr static int global_flags = 0; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class dimino_add_all_coset_spaces | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Recursive template for adding all coset spaces for a new generator | ||||
|   * | ||||
|   * This template tries to go through the list of generators (with | ||||
|   * the help of the dimino_add_cosets_for_rep template) as long as | ||||
|   * it still finds elements that are not part of the group and add | ||||
|   * the corresponding cosets. | ||||
|   * | ||||
|   * \sa enumerate_group_elements, dimino_add_cosets_for_rep | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename sub_group_elements, | ||||
|   typename elements, | ||||
|   typename generators, | ||||
|   int sub_group_size, | ||||
|   int rep_pos, | ||||
|   bool stop_condition        // = false | ||||
| > | ||||
| struct dimino_add_all_coset_spaces | ||||
| { | ||||
|   typedef typename get<rep_pos, elements>::type rep_element; | ||||
|   typedef dimino_add_cosets_for_rep< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     sub_group_elements, | ||||
|     elements, | ||||
|     generators, | ||||
|     rep_element, | ||||
|     sub_group_elements::count | ||||
|   > _ac4r; | ||||
|   typedef typename _ac4r::type new_elements; | ||||
|    | ||||
|   constexpr static int new_rep_pos = rep_pos + sub_group_elements::count; | ||||
|   constexpr static bool new_stop_condition = new_rep_pos >= new_elements::count; | ||||
|  | ||||
|   typedef dimino_add_all_coset_spaces< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     sub_group_elements, | ||||
|     new_elements, | ||||
|     generators, | ||||
|     sub_group_size, | ||||
|     new_rep_pos, | ||||
|     new_stop_condition | ||||
|   > _helper; | ||||
|  | ||||
|   typedef typename _helper::type type; | ||||
|   constexpr static int global_flags = _helper::global_flags | _ac4r::global_flags; | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename sub_group_elements, | ||||
|   typename elements, | ||||
|   typename generators, | ||||
|   int sub_group_size, | ||||
|   int rep_pos | ||||
| > | ||||
| struct dimino_add_all_coset_spaces<Multiply, Equality, id, sub_group_elements, elements, generators, sub_group_size, rep_pos, true> | ||||
| { | ||||
|   typedef elements type; | ||||
|   constexpr static int global_flags = 0; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class dimino_add_generator | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Enlarge the group by adding a new generator. | ||||
|   * | ||||
|   * It accepts a boolean parameter that determines if the generator is redundant, | ||||
|   * i.e. was already seen in the group. In that case, it reduces to a no-op. | ||||
|   * | ||||
|   * \sa enumerate_group_elements, dimino_add_all_coset_spaces | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename elements, | ||||
|   typename generators_done, | ||||
|   typename current_generator, | ||||
|   bool redundant          // = false | ||||
| > | ||||
| struct dimino_add_generator | ||||
| { | ||||
|   /* this template is only called if the generator is not redundant | ||||
|    * => all elements of the group multiplied with the new generator | ||||
|    *    are going to be new elements of the most trivial coset space | ||||
|    */ | ||||
|   typedef typename apply_op_from_right<Multiply, current_generator, elements>::type multiplied_elements; | ||||
|   typedef typename concat<elements, multiplied_elements>::type new_elements; | ||||
|  | ||||
|   constexpr static int rep_pos = elements::count; | ||||
|  | ||||
|   typedef dimino_add_all_coset_spaces< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     elements, // elements of previous subgroup | ||||
|     new_elements, | ||||
|     typename concat<generators_done, type_list<current_generator>>::type, | ||||
|     elements::count, // size of previous subgroup | ||||
|     rep_pos, | ||||
|     false // don't stop (because rep_pos >= new_elements::count is always false at this point) | ||||
|   > _helper; | ||||
|   typedef typename _helper::type type; | ||||
|   constexpr static int global_flags = _helper::global_flags; | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename elements, | ||||
|   typename generators_done, | ||||
|   typename current_generator | ||||
| > | ||||
| struct dimino_add_generator<Multiply, Equality, id, elements, generators_done, current_generator, true> | ||||
| { | ||||
|   // redundant case | ||||
|   typedef elements type; | ||||
|   constexpr static int global_flags = 0; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class dimino_add_remaining_generators | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Recursive template that adds all remaining generators to a group | ||||
|   * | ||||
|   * Loop through the list of generators that remain and successively | ||||
|   * add them to the group. | ||||
|   * | ||||
|   * \sa enumerate_group_elements, dimino_add_generator | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename generators_done, | ||||
|   typename remaining_generators, | ||||
|   typename elements | ||||
| > | ||||
| struct dimino_add_remaining_generators | ||||
| { | ||||
|   typedef typename get<0, remaining_generators>::type first_generator; | ||||
|   typedef typename skip<1, remaining_generators>::type next_generators; | ||||
|  | ||||
|   typedef contained_in_list_gf<Equality, first_generator, elements> _cil; | ||||
|  | ||||
|   typedef dimino_add_generator< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     elements, | ||||
|     generators_done, | ||||
|     first_generator, | ||||
|     _cil::value | ||||
|   > _helper; | ||||
|  | ||||
|   typedef typename _helper::type new_elements; | ||||
|  | ||||
|   typedef dimino_add_remaining_generators< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     typename concat<generators_done, type_list<first_generator>>::type, | ||||
|     next_generators, | ||||
|     new_elements | ||||
|   > _next_iter; | ||||
|  | ||||
|   typedef typename _next_iter::type type; | ||||
|   constexpr static int global_flags = | ||||
|     _cil::global_flags | | ||||
|     _helper::global_flags | | ||||
|     _next_iter::global_flags; | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename generators_done, | ||||
|   typename elements | ||||
| > | ||||
| struct dimino_add_remaining_generators<Multiply, Equality, id, generators_done, type_list<>, elements> | ||||
| { | ||||
|   typedef elements type; | ||||
|   constexpr static int global_flags = 0; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class enumerate_group_elements_noid | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Helper template that implements group element enumeration | ||||
|   * | ||||
|   * This is a helper template that implements the actual enumeration | ||||
|   * of group elements. This has been split so that the list of | ||||
|   * generators can be cleansed of the identity element before | ||||
|   * performing the actual operation. | ||||
|   * | ||||
|   * \sa enumerate_group_elements | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename generators, | ||||
|   int initial_global_flags = 0 | ||||
| > | ||||
| struct enumerate_group_elements_noid | ||||
| { | ||||
|   typedef dimino_first_step_elements<Multiply, Equality, id, generators> first_step; | ||||
|   typedef typename first_step::type first_step_elements; | ||||
|  | ||||
|   typedef dimino_add_remaining_generators< | ||||
|     Multiply, | ||||
|     Equality, | ||||
|     id, | ||||
|     typename first_step::generators_done, | ||||
|     typename first_step::next_generators, // remaining_generators | ||||
|     typename first_step::type // first_step elements | ||||
|   > _helper; | ||||
|  | ||||
|   typedef typename _helper::type type; | ||||
|   constexpr static int global_flags = | ||||
|     initial_global_flags | | ||||
|     first_step::global_flags | | ||||
|     _helper::global_flags; | ||||
| }; | ||||
|  | ||||
| // in case when no generators are specified | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   int initial_global_flags | ||||
| > | ||||
| struct enumerate_group_elements_noid<Multiply, Equality, id, type_list<>, initial_global_flags> | ||||
| { | ||||
|   typedef type_list<id> type; | ||||
|   constexpr static int global_flags = initial_global_flags; | ||||
| }; | ||||
|  | ||||
| /** \internal | ||||
|   * | ||||
|   * \class enumerate_group_elements | ||||
|   * \ingroup CXX11_TensorSymmetry_Module | ||||
|   * | ||||
|   * \brief Enumerate all elements in a finite group | ||||
|   * | ||||
|   * This template enumerates all elements in a finite group. It accepts | ||||
|   * the following template parameters: | ||||
|   * | ||||
|   * \tparam Multiply      The multiplication operation that multiplies two group elements | ||||
|   *                       with each other. | ||||
|   * \tparam Equality      The equality check operation that checks if two group elements | ||||
|   *                       are equal to another. | ||||
|   * \tparam id            The identity element | ||||
|   * \tparam _generators   A list of (possibly redundant) generators of the group | ||||
|   */ | ||||
| template< | ||||
|   template<typename, typename> class Multiply, | ||||
|   template<typename, typename> class Equality, | ||||
|   typename id, | ||||
|   typename _generators | ||||
| > | ||||
| struct enumerate_group_elements | ||||
|   : public enumerate_group_elements_noid< | ||||
|       Multiply, | ||||
|       Equality, | ||||
|       id, | ||||
|       typename strip_identities<Equality, id, _generators>::type, | ||||
|       strip_identities<Equality, id, _generators>::global_flags | ||||
|     > | ||||
| { | ||||
| }; | ||||
|  | ||||
| } // end namespace group_theory | ||||
|  | ||||
| } // end namespace internal | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H | ||||
|  | ||||
| /* | ||||
|  * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; | ||||
|  */ | ||||
							
								
								
									
										233
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										233
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,233 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ | ||||
| #define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| // EventCount allows to wait for arbitrary predicates in non-blocking | ||||
| // algorithms. Think of condition variable, but wait predicate does not need to | ||||
| // be protected by a mutex. Usage: | ||||
| // Waiting thread does: | ||||
| // | ||||
| //   if (predicate) | ||||
| //     return act(); | ||||
| //   EventCount::Waiter& w = waiters[my_index]; | ||||
| //   ec.Prewait(&w); | ||||
| //   if (predicate) { | ||||
| //     ec.CancelWait(&w); | ||||
| //     return act(); | ||||
| //   } | ||||
| //   ec.CommitWait(&w); | ||||
| // | ||||
| // Notifying thread does: | ||||
| // | ||||
| //   predicate = true; | ||||
| //   ec.Notify(true); | ||||
| // | ||||
| // Notify is cheap if there are no waiting threads. Prewait/CommitWait are not | ||||
| // cheap, but they are executed only if the preceeding predicate check has | ||||
| // failed. | ||||
| // | ||||
| // Algorihtm outline: | ||||
| // There are two main variables: predicate (managed by user) and state_. | ||||
| // Operation closely resembles Dekker mutual algorithm: | ||||
| // https://en.wikipedia.org/wiki/Dekker%27s_algorithm | ||||
| // Waiting thread sets state_ then checks predicate, Notifying thread sets | ||||
| // predicate then checks state_. Due to seq_cst fences in between these | ||||
| // operations it is guaranteed than either waiter will see predicate change | ||||
| // and won't block, or notifying thread will see state_ change and will unblock | ||||
| // the waiter, or both. But it can't happen that both threads don't see each | ||||
| // other changes, which would lead to deadlock. | ||||
| class EventCount { | ||||
|  public: | ||||
|   class Waiter; | ||||
|  | ||||
|   EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) { | ||||
|     eigen_assert(waiters.size() < (1 << kWaiterBits) - 1); | ||||
|     // Initialize epoch to something close to overflow to test overflow. | ||||
|     state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2); | ||||
|   } | ||||
|  | ||||
|   ~EventCount() { | ||||
|     // Ensure there are no waiters. | ||||
|     eigen_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask); | ||||
|   } | ||||
|  | ||||
|   // Prewait prepares for waiting. | ||||
|   // After calling this function the thread must re-check the wait predicate | ||||
|   // and call either CancelWait or CommitWait passing the same Waiter object. | ||||
|   void Prewait(Waiter* w) { | ||||
|     w->epoch = state_.fetch_add(kWaiterInc, std::memory_order_relaxed); | ||||
|     std::atomic_thread_fence(std::memory_order_seq_cst); | ||||
|   } | ||||
|  | ||||
|   // CommitWait commits waiting. | ||||
|   void CommitWait(Waiter* w) { | ||||
|     w->state = Waiter::kNotSignaled; | ||||
|     // Modification epoch of this waiter. | ||||
|     uint64_t epoch = | ||||
|         (w->epoch & kEpochMask) + | ||||
|         (((w->epoch & kWaiterMask) >> kWaiterShift) << kEpochShift); | ||||
|     uint64_t state = state_.load(std::memory_order_seq_cst); | ||||
|     for (;;) { | ||||
|       if (int64_t((state & kEpochMask) - epoch) < 0) { | ||||
|         // The preceeding waiter has not decided on its fate. Wait until it | ||||
|         // calls either CancelWait or CommitWait, or is notified. | ||||
|         EIGEN_THREAD_YIELD(); | ||||
|         state = state_.load(std::memory_order_seq_cst); | ||||
|         continue; | ||||
|       } | ||||
|       // We've already been notified. | ||||
|       if (int64_t((state & kEpochMask) - epoch) > 0) return; | ||||
|       // Remove this thread from prewait counter and add it to the waiter list. | ||||
|       eigen_assert((state & kWaiterMask) != 0); | ||||
|       uint64_t newstate = state - kWaiterInc + kEpochInc; | ||||
|       newstate = (newstate & ~kStackMask) | (w - &waiters_[0]); | ||||
|       if ((state & kStackMask) == kStackMask) | ||||
|         w->next.store(nullptr, std::memory_order_relaxed); | ||||
|       else | ||||
|         w->next.store(&waiters_[state & kStackMask], std::memory_order_relaxed); | ||||
|       if (state_.compare_exchange_weak(state, newstate, | ||||
|                                        std::memory_order_release)) | ||||
|         break; | ||||
|     } | ||||
|     Park(w); | ||||
|   } | ||||
|  | ||||
|   // CancelWait cancels effects of the previous Prewait call. | ||||
|   void CancelWait(Waiter* w) { | ||||
|     uint64_t epoch = | ||||
|         (w->epoch & kEpochMask) + | ||||
|         (((w->epoch & kWaiterMask) >> kWaiterShift) << kEpochShift); | ||||
|     uint64_t state = state_.load(std::memory_order_relaxed); | ||||
|     for (;;) { | ||||
|       if (int64_t((state & kEpochMask) - epoch) < 0) { | ||||
|         // The preceeding waiter has not decided on its fate. Wait until it | ||||
|         // calls either CancelWait or CommitWait, or is notified. | ||||
|         EIGEN_THREAD_YIELD(); | ||||
|         state = state_.load(std::memory_order_relaxed); | ||||
|         continue; | ||||
|       } | ||||
|       // We've already been notified. | ||||
|       if (int64_t((state & kEpochMask) - epoch) > 0) return; | ||||
|       // Remove this thread from prewait counter. | ||||
|       eigen_assert((state & kWaiterMask) != 0); | ||||
|       if (state_.compare_exchange_weak(state, state - kWaiterInc + kEpochInc, | ||||
|                                        std::memory_order_relaxed)) | ||||
|         return; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Notify wakes one or all waiting threads. | ||||
|   // Must be called after changing the associated wait predicate. | ||||
|   void Notify(bool all) { | ||||
|     std::atomic_thread_fence(std::memory_order_seq_cst); | ||||
|     uint64_t state = state_.load(std::memory_order_acquire); | ||||
|     for (;;) { | ||||
|       // Easy case: no waiters. | ||||
|       if ((state & kStackMask) == kStackMask && (state & kWaiterMask) == 0) | ||||
|         return; | ||||
|       uint64_t waiters = (state & kWaiterMask) >> kWaiterShift; | ||||
|       uint64_t newstate; | ||||
|       if (all) { | ||||
|         // Reset prewait counter and empty wait list. | ||||
|         newstate = (state & kEpochMask) + (kEpochInc * waiters) + kStackMask; | ||||
|       } else if (waiters) { | ||||
|         // There is a thread in pre-wait state, unblock it. | ||||
|         newstate = state + kEpochInc - kWaiterInc; | ||||
|       } else { | ||||
|         // Pop a waiter from list and unpark it. | ||||
|         Waiter* w = &waiters_[state & kStackMask]; | ||||
|         Waiter* wnext = w->next.load(std::memory_order_relaxed); | ||||
|         uint64_t next = kStackMask; | ||||
|         if (wnext != nullptr) next = wnext - &waiters_[0]; | ||||
|         // Note: we don't add kEpochInc here. ABA problem on the lock-free stack | ||||
|         // can't happen because a waiter is re-pushed onto the stack only after | ||||
|         // it was in the pre-wait state which inevitably leads to epoch | ||||
|         // increment. | ||||
|         newstate = (state & kEpochMask) + next; | ||||
|       } | ||||
|       if (state_.compare_exchange_weak(state, newstate, | ||||
|                                        std::memory_order_acquire)) { | ||||
|         if (!all && waiters) return;  // unblocked pre-wait thread | ||||
|         if ((state & kStackMask) == kStackMask) return; | ||||
|         Waiter* w = &waiters_[state & kStackMask]; | ||||
|         if (!all) w->next.store(nullptr, std::memory_order_relaxed); | ||||
|         Unpark(w); | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   class Waiter { | ||||
|     friend class EventCount; | ||||
|     // Align to 128 byte boundary to prevent false sharing with other Waiter objects in the same vector. | ||||
|     EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic<Waiter*> next; | ||||
|     std::mutex mu; | ||||
|     std::condition_variable cv; | ||||
|     uint64_t epoch; | ||||
|     unsigned state; | ||||
|     enum { | ||||
|       kNotSignaled, | ||||
|       kWaiting, | ||||
|       kSignaled, | ||||
|     }; | ||||
|   }; | ||||
|  | ||||
|  private: | ||||
|   // State_ layout: | ||||
|   // - low kStackBits is a stack of waiters committed wait. | ||||
|   // - next kWaiterBits is count of waiters in prewait state. | ||||
|   // - next kEpochBits is modification counter. | ||||
|   static const uint64_t kStackBits = 16; | ||||
|   static const uint64_t kStackMask = (1ull << kStackBits) - 1; | ||||
|   static const uint64_t kWaiterBits = 16; | ||||
|   static const uint64_t kWaiterShift = 16; | ||||
|   static const uint64_t kWaiterMask = ((1ull << kWaiterBits) - 1) | ||||
|                                       << kWaiterShift; | ||||
|   static const uint64_t kWaiterInc = 1ull << kWaiterBits; | ||||
|   static const uint64_t kEpochBits = 32; | ||||
|   static const uint64_t kEpochShift = 32; | ||||
|   static const uint64_t kEpochMask = ((1ull << kEpochBits) - 1) << kEpochShift; | ||||
|   static const uint64_t kEpochInc = 1ull << kEpochShift; | ||||
|   std::atomic<uint64_t> state_; | ||||
|   MaxSizeVector<Waiter>& waiters_; | ||||
|  | ||||
|   void Park(Waiter* w) { | ||||
|     std::unique_lock<std::mutex> lock(w->mu); | ||||
|     while (w->state != Waiter::kSignaled) { | ||||
|       w->state = Waiter::kWaiting; | ||||
|       w->cv.wait(lock); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void Unpark(Waiter* waiters) { | ||||
|     Waiter* next = nullptr; | ||||
|     for (Waiter* w = waiters; w; w = next) { | ||||
|       next = w->next.load(std::memory_order_relaxed); | ||||
|       unsigned state; | ||||
|       { | ||||
|         std::unique_lock<std::mutex> lock(w->mu); | ||||
|         state = w->state; | ||||
|         w->state = Waiter::kSignaled; | ||||
|       } | ||||
|       // Avoid notifying if it wasn't waiting. | ||||
|       if (state == Waiter::kWaiting) w->cv.notify_one(); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   EventCount(const EventCount&) = delete; | ||||
|   void operator=(const EventCount&) = delete; | ||||
| }; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ | ||||
							
								
								
									
										274
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										274
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,274 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H | ||||
| #define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| template <typename Environment> | ||||
| class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { | ||||
|  public: | ||||
|   typedef typename Environment::Task Task; | ||||
|   typedef RunQueue<Task, 1024> Queue; | ||||
|  | ||||
|   NonBlockingThreadPoolTempl(int num_threads, Environment env = Environment()) | ||||
|       : env_(env), | ||||
|         threads_(num_threads), | ||||
|         queues_(num_threads), | ||||
|         coprimes_(num_threads), | ||||
|         waiters_(num_threads), | ||||
|         blocked_(0), | ||||
|         spinning_(0), | ||||
|         done_(false), | ||||
|         ec_(waiters_) { | ||||
|     waiters_.resize(num_threads); | ||||
|  | ||||
|     // Calculate coprimes of num_threads. | ||||
|     // Coprimes are used for a random walk over all threads in Steal | ||||
|     // and NonEmptyQueueIndex. Iteration is based on the fact that if we take | ||||
|     // a walk starting thread index t and calculate num_threads - 1 subsequent | ||||
|     // indices as (t + coprime) % num_threads, we will cover all threads without | ||||
|     // repetitions (effectively getting a presudo-random permutation of thread | ||||
|     // indices). | ||||
|     for (int i = 1; i <= num_threads; i++) { | ||||
|       unsigned a = i; | ||||
|       unsigned b = num_threads; | ||||
|       // If GCD(a, b) == 1, then a and b are coprimes. | ||||
|       while (b != 0) { | ||||
|         unsigned tmp = a; | ||||
|         a = b; | ||||
|         b = tmp % b; | ||||
|       } | ||||
|       if (a == 1) { | ||||
|         coprimes_.push_back(i); | ||||
|       } | ||||
|     } | ||||
|     for (int i = 0; i < num_threads; i++) { | ||||
|       queues_.push_back(new Queue()); | ||||
|     } | ||||
|     for (int i = 0; i < num_threads; i++) { | ||||
|       threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); })); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   ~NonBlockingThreadPoolTempl() { | ||||
|     done_ = true; | ||||
|     // Now if all threads block without work, they will start exiting. | ||||
|     // But note that threads can continue to work arbitrary long, | ||||
|     // block, submit new work, unblock and otherwise live full life. | ||||
|     ec_.Notify(true); | ||||
|  | ||||
|     // Join threads explicitly to avoid destruction order issues. | ||||
|     for (size_t i = 0; i < threads_.size(); i++) delete threads_[i]; | ||||
|     for (size_t i = 0; i < threads_.size(); i++) delete queues_[i]; | ||||
|   } | ||||
|  | ||||
|   void Schedule(std::function<void()> fn) { | ||||
|     Task t = env_.CreateTask(std::move(fn)); | ||||
|     PerThread* pt = GetPerThread(); | ||||
|     if (pt->pool == this) { | ||||
|       // Worker thread of this pool, push onto the thread's queue. | ||||
|       Queue* q = queues_[pt->thread_id]; | ||||
|       t = q->PushFront(std::move(t)); | ||||
|     } else { | ||||
|       // A free-standing thread (or worker of another pool), push onto a random | ||||
|       // queue. | ||||
|       Queue* q = queues_[Rand(&pt->rand) % queues_.size()]; | ||||
|       t = q->PushBack(std::move(t)); | ||||
|     } | ||||
|     // Note: below we touch this after making w available to worker threads. | ||||
|     // Strictly speaking, this can lead to a racy-use-after-free. Consider that | ||||
|     // Schedule is called from a thread that is neither main thread nor a worker | ||||
|     // thread of this pool. Then, execution of w directly or indirectly | ||||
|     // completes overall computations, which in turn leads to destruction of | ||||
|     // this. We expect that such scenario is prevented by program, that is, | ||||
|     // this is kept alive while any threads can potentially be in Schedule. | ||||
|     if (!t.f) | ||||
|       ec_.Notify(false); | ||||
|     else | ||||
|       env_.ExecuteTask(t);  // Push failed, execute directly. | ||||
|   } | ||||
|  | ||||
|   int NumThreads() const final { | ||||
|     return static_cast<int>(threads_.size()); | ||||
|   } | ||||
|  | ||||
|   int CurrentThreadId() const final { | ||||
|     const PerThread* pt = | ||||
|         const_cast<NonBlockingThreadPoolTempl*>(this)->GetPerThread(); | ||||
|     if (pt->pool == this) { | ||||
|       return pt->thread_id; | ||||
|     } else { | ||||
|       return -1; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   typedef typename Environment::EnvThread Thread; | ||||
|  | ||||
|   struct PerThread { | ||||
|     constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) { } | ||||
|     NonBlockingThreadPoolTempl* pool;  // Parent pool, or null for normal threads. | ||||
|     uint64_t rand;  // Random generator state. | ||||
|     int thread_id;  // Worker thread index in pool. | ||||
|   }; | ||||
|  | ||||
|   Environment env_; | ||||
|   MaxSizeVector<Thread*> threads_; | ||||
|   MaxSizeVector<Queue*> queues_; | ||||
|   MaxSizeVector<unsigned> coprimes_; | ||||
|   MaxSizeVector<EventCount::Waiter> waiters_; | ||||
|   std::atomic<unsigned> blocked_; | ||||
|   std::atomic<bool> spinning_; | ||||
|   std::atomic<bool> done_; | ||||
|   EventCount ec_; | ||||
|  | ||||
|   // Main worker thread loop. | ||||
|   void WorkerLoop(int thread_id) { | ||||
|     PerThread* pt = GetPerThread(); | ||||
|     pt->pool = this; | ||||
|     pt->rand = std::hash<std::thread::id>()(std::this_thread::get_id()); | ||||
|     pt->thread_id = thread_id; | ||||
|     Queue* q = queues_[thread_id]; | ||||
|     EventCount::Waiter* waiter = &waiters_[thread_id]; | ||||
|     for (;;) { | ||||
|       Task t = q->PopFront(); | ||||
|       if (!t.f) { | ||||
|         t = Steal(); | ||||
|         if (!t.f) { | ||||
|           // Leave one thread spinning. This reduces latency. | ||||
|           // TODO(dvyukov): 1000 iterations is based on fair dice roll, tune it. | ||||
|           // Also, the time it takes to attempt to steal work 1000 times depends | ||||
|           // on the size of the thread pool. However the speed at which the user | ||||
|           // of the thread pool submit tasks is independent of the size of the | ||||
|           // pool. Consider a time based limit instead. | ||||
|           if (!spinning_ && !spinning_.exchange(true)) { | ||||
|             for (int i = 0; i < 1000 && !t.f; i++) { | ||||
|               t = Steal(); | ||||
|             } | ||||
|             spinning_ = false; | ||||
|           } | ||||
|           if (!t.f) { | ||||
|             if (!WaitForWork(waiter, &t)) { | ||||
|               return; | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|       if (t.f) { | ||||
|         env_.ExecuteTask(t); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Steal tries to steal work from other worker threads in best-effort manner. | ||||
|   Task Steal() { | ||||
|     PerThread* pt = GetPerThread(); | ||||
|     const size_t size = queues_.size(); | ||||
|     unsigned r = Rand(&pt->rand); | ||||
|     unsigned inc = coprimes_[r % coprimes_.size()]; | ||||
|     unsigned victim = r % size; | ||||
|     for (unsigned i = 0; i < size; i++) { | ||||
|       Task t = queues_[victim]->PopBack(); | ||||
|       if (t.f) { | ||||
|         return t; | ||||
|       } | ||||
|       victim += inc; | ||||
|       if (victim >= size) { | ||||
|         victim -= size; | ||||
|       } | ||||
|     } | ||||
|     return Task(); | ||||
|   } | ||||
|  | ||||
|   // WaitForWork blocks until new work is available (returns true), or if it is | ||||
|   // time to exit (returns false). Can optionally return a task to execute in t | ||||
|   // (in such case t.f != nullptr on return). | ||||
|   bool WaitForWork(EventCount::Waiter* waiter, Task* t) { | ||||
|     eigen_assert(!t->f); | ||||
|     // We already did best-effort emptiness check in Steal, so prepare for | ||||
|     // blocking. | ||||
|     ec_.Prewait(waiter); | ||||
|     // Now do a reliable emptiness check. | ||||
|     int victim = NonEmptyQueueIndex(); | ||||
|     if (victim != -1) { | ||||
|       ec_.CancelWait(waiter); | ||||
|       *t = queues_[victim]->PopBack(); | ||||
|       return true; | ||||
|     } | ||||
|     // Number of blocked threads is used as termination condition. | ||||
|     // If we are shutting down and all worker threads blocked without work, | ||||
|     // that's we are done. | ||||
|     blocked_++; | ||||
|     if (done_ && blocked_ == threads_.size()) { | ||||
|       ec_.CancelWait(waiter); | ||||
|       // Almost done, but need to re-check queues. | ||||
|       // Consider that all queues are empty and all worker threads are preempted | ||||
|       // right after incrementing blocked_ above. Now a free-standing thread | ||||
|       // submits work and calls destructor (which sets done_). If we don't | ||||
|       // re-check queues, we will exit leaving the work unexecuted. | ||||
|       if (NonEmptyQueueIndex() != -1) { | ||||
|         // Note: we must not pop from queues before we decrement blocked_, | ||||
|         // otherwise the following scenario is possible. Consider that instead | ||||
|         // of checking for emptiness we popped the only element from queues. | ||||
|         // Now other worker threads can start exiting, which is bad if the | ||||
|         // work item submits other work. So we just check emptiness here, | ||||
|         // which ensures that all worker threads exit at the same time. | ||||
|         blocked_--; | ||||
|         return true; | ||||
|       } | ||||
|       // Reached stable termination state. | ||||
|       ec_.Notify(true); | ||||
|       return false; | ||||
|     } | ||||
|     ec_.CommitWait(waiter); | ||||
|     blocked_--; | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   int NonEmptyQueueIndex() { | ||||
|     PerThread* pt = GetPerThread(); | ||||
|     const size_t size = queues_.size(); | ||||
|     unsigned r = Rand(&pt->rand); | ||||
|     unsigned inc = coprimes_[r % coprimes_.size()]; | ||||
|     unsigned victim = r % size; | ||||
|     for (unsigned i = 0; i < size; i++) { | ||||
|       if (!queues_[victim]->Empty()) { | ||||
|         return victim; | ||||
|       } | ||||
|       victim += inc; | ||||
|       if (victim >= size) { | ||||
|         victim -= size; | ||||
|       } | ||||
|     } | ||||
|     return -1; | ||||
|   } | ||||
|  | ||||
|   static EIGEN_STRONG_INLINE PerThread* GetPerThread() { | ||||
|     EIGEN_THREAD_LOCAL PerThread per_thread_; | ||||
|     PerThread* pt = &per_thread_; | ||||
|     return pt; | ||||
|   } | ||||
|  | ||||
|   static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) { | ||||
|     uint64_t current = *state; | ||||
|     // Update the internal state | ||||
|     *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; | ||||
|     // Generate the random output (using the PCG-XSH-RS scheme) | ||||
|     return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| typedef NonBlockingThreadPoolTempl<StlThreadEnvironment> NonBlockingThreadPool; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H | ||||
							
								
								
									
										210
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										210
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,210 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ | ||||
| #define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| // RunQueue is a fixed-size, partially non-blocking deque or Work items. | ||||
| // Operations on front of the queue must be done by a single thread (owner), | ||||
| // operations on back of the queue can be done by multiple threads concurrently. | ||||
| // | ||||
| // Algorithm outline: | ||||
| // All remote threads operating on the queue back are serialized by a mutex. | ||||
| // This ensures that at most two threads access state: owner and one remote | ||||
| // thread (Size aside). The algorithm ensures that the occupied region of the | ||||
| // underlying array is logically continuous (can wraparound, but no stray | ||||
| // occupied elements). Owner operates on one end of this region, remote thread | ||||
| // operates on the other end. Synchronization between these threads | ||||
| // (potential consumption of the last element and take up of the last empty | ||||
| // element) happens by means of state variable in each element. States are: | ||||
| // empty, busy (in process of insertion of removal) and ready. Threads claim | ||||
| // elements (empty->busy and ready->busy transitions) by means of a CAS | ||||
| // operation. The finishing transition (busy->empty and busy->ready) are done | ||||
| // with plain store as the element is exclusively owned by the current thread. | ||||
| // | ||||
| // Note: we could permit only pointers as elements, then we would not need | ||||
| // separate state variable as null/non-null pointer value would serve as state, | ||||
| // but that would require malloc/free per operation for large, complex values | ||||
| // (and this is designed to store std::function<()>). | ||||
| template <typename Work, unsigned kSize> | ||||
| class RunQueue { | ||||
|  public: | ||||
|   RunQueue() : front_(0), back_(0) { | ||||
|     // require power-of-two for fast masking | ||||
|     eigen_assert((kSize & (kSize - 1)) == 0); | ||||
|     eigen_assert(kSize > 2);            // why would you do this? | ||||
|     eigen_assert(kSize <= (64 << 10));  // leave enough space for counter | ||||
|     for (unsigned i = 0; i < kSize; i++) | ||||
|       array_[i].state.store(kEmpty, std::memory_order_relaxed); | ||||
|   } | ||||
|  | ||||
|   ~RunQueue() { eigen_assert(Size() == 0); } | ||||
|  | ||||
|   // PushFront inserts w at the beginning of the queue. | ||||
|   // If queue is full returns w, otherwise returns default-constructed Work. | ||||
|   Work PushFront(Work w) { | ||||
|     unsigned front = front_.load(std::memory_order_relaxed); | ||||
|     Elem* e = &array_[front & kMask]; | ||||
|     uint8_t s = e->state.load(std::memory_order_relaxed); | ||||
|     if (s != kEmpty || | ||||
|         !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) | ||||
|       return w; | ||||
|     front_.store(front + 1 + (kSize << 1), std::memory_order_relaxed); | ||||
|     e->w = std::move(w); | ||||
|     e->state.store(kReady, std::memory_order_release); | ||||
|     return Work(); | ||||
|   } | ||||
|  | ||||
|   // PopFront removes and returns the first element in the queue. | ||||
|   // If the queue was empty returns default-constructed Work. | ||||
|   Work PopFront() { | ||||
|     unsigned front = front_.load(std::memory_order_relaxed); | ||||
|     Elem* e = &array_[(front - 1) & kMask]; | ||||
|     uint8_t s = e->state.load(std::memory_order_relaxed); | ||||
|     if (s != kReady || | ||||
|         !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) | ||||
|       return Work(); | ||||
|     Work w = std::move(e->w); | ||||
|     e->state.store(kEmpty, std::memory_order_release); | ||||
|     front = ((front - 1) & kMask2) | (front & ~kMask2); | ||||
|     front_.store(front, std::memory_order_relaxed); | ||||
|     return w; | ||||
|   } | ||||
|  | ||||
|   // PushBack adds w at the end of the queue. | ||||
|   // If queue is full returns w, otherwise returns default-constructed Work. | ||||
|   Work PushBack(Work w) { | ||||
|     std::unique_lock<std::mutex> lock(mutex_); | ||||
|     unsigned back = back_.load(std::memory_order_relaxed); | ||||
|     Elem* e = &array_[(back - 1) & kMask]; | ||||
|     uint8_t s = e->state.load(std::memory_order_relaxed); | ||||
|     if (s != kEmpty || | ||||
|         !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) | ||||
|       return w; | ||||
|     back = ((back - 1) & kMask2) | (back & ~kMask2); | ||||
|     back_.store(back, std::memory_order_relaxed); | ||||
|     e->w = std::move(w); | ||||
|     e->state.store(kReady, std::memory_order_release); | ||||
|     return Work(); | ||||
|   } | ||||
|  | ||||
|   // PopBack removes and returns the last elements in the queue. | ||||
|   // Can fail spuriously. | ||||
|   Work PopBack() { | ||||
|     if (Empty()) return Work(); | ||||
|     std::unique_lock<std::mutex> lock(mutex_, std::try_to_lock); | ||||
|     if (!lock) return Work(); | ||||
|     unsigned back = back_.load(std::memory_order_relaxed); | ||||
|     Elem* e = &array_[back & kMask]; | ||||
|     uint8_t s = e->state.load(std::memory_order_relaxed); | ||||
|     if (s != kReady || | ||||
|         !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) | ||||
|       return Work(); | ||||
|     Work w = std::move(e->w); | ||||
|     e->state.store(kEmpty, std::memory_order_release); | ||||
|     back_.store(back + 1 + (kSize << 1), std::memory_order_relaxed); | ||||
|     return w; | ||||
|   } | ||||
|  | ||||
|   // PopBackHalf removes and returns half last elements in the queue. | ||||
|   // Returns number of elements removed. But can also fail spuriously. | ||||
|   unsigned PopBackHalf(std::vector<Work>* result) { | ||||
|     if (Empty()) return 0; | ||||
|     std::unique_lock<std::mutex> lock(mutex_, std::try_to_lock); | ||||
|     if (!lock) return 0; | ||||
|     unsigned back = back_.load(std::memory_order_relaxed); | ||||
|     unsigned size = Size(); | ||||
|     unsigned mid = back; | ||||
|     if (size > 1) mid = back + (size - 1) / 2; | ||||
|     unsigned n = 0; | ||||
|     unsigned start = 0; | ||||
|     for (; static_cast<int>(mid - back) >= 0; mid--) { | ||||
|       Elem* e = &array_[mid & kMask]; | ||||
|       uint8_t s = e->state.load(std::memory_order_relaxed); | ||||
|       if (n == 0) { | ||||
|         if (s != kReady || | ||||
|             !e->state.compare_exchange_strong(s, kBusy, | ||||
|                                               std::memory_order_acquire)) | ||||
|           continue; | ||||
|         start = mid; | ||||
|       } else { | ||||
|         // Note: no need to store temporal kBusy, we exclusively own these | ||||
|         // elements. | ||||
|         eigen_assert(s == kReady); | ||||
|       } | ||||
|       result->push_back(std::move(e->w)); | ||||
|       e->state.store(kEmpty, std::memory_order_release); | ||||
|       n++; | ||||
|     } | ||||
|     if (n != 0) | ||||
|       back_.store(start + 1 + (kSize << 1), std::memory_order_relaxed); | ||||
|     return n; | ||||
|   } | ||||
|  | ||||
|   // Size returns current queue size. | ||||
|   // Can be called by any thread at any time. | ||||
|   unsigned Size() const { | ||||
|     // Emptiness plays critical role in thread pool blocking. So we go to great | ||||
|     // effort to not produce false positives (claim non-empty queue as empty). | ||||
|     for (;;) { | ||||
|       // Capture a consistent snapshot of front/tail. | ||||
|       unsigned front = front_.load(std::memory_order_acquire); | ||||
|       unsigned back = back_.load(std::memory_order_acquire); | ||||
|       unsigned front1 = front_.load(std::memory_order_relaxed); | ||||
|       if (front != front1) continue; | ||||
|       int size = (front & kMask2) - (back & kMask2); | ||||
|       // Fix overflow. | ||||
|       if (size < 0) size += 2 * kSize; | ||||
|       // Order of modification in push/pop is crafted to make the queue look | ||||
|       // larger than it is during concurrent modifications. E.g. pop can | ||||
|       // decrement size before the corresponding push has incremented it. | ||||
|       // So the computed size can be up to kSize + 1, fix it. | ||||
|       if (size > static_cast<int>(kSize)) size = kSize; | ||||
|       return size; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Empty tests whether container is empty. | ||||
|   // Can be called by any thread at any time. | ||||
|   bool Empty() const { return Size() == 0; } | ||||
|  | ||||
|  private: | ||||
|   static const unsigned kMask = kSize - 1; | ||||
|   static const unsigned kMask2 = (kSize << 1) - 1; | ||||
|   struct Elem { | ||||
|     std::atomic<uint8_t> state; | ||||
|     Work w; | ||||
|   }; | ||||
|   enum { | ||||
|     kEmpty, | ||||
|     kBusy, | ||||
|     kReady, | ||||
|   }; | ||||
|   std::mutex mutex_; | ||||
|   // Low log(kSize) + 1 bits in front_ and back_ contain rolling index of | ||||
|   // front/back, repsectively. The remaining bits contain modification counters | ||||
|   // that are incremented on Push operations. This allows us to (1) distinguish | ||||
|   // between empty and full conditions (if we would use log(kSize) bits for | ||||
|   // position, these conditions would be indistinguishable); (2) obtain | ||||
|   // consistent snapshot of front_/back_ for Size operation using the | ||||
|   // modification counters. | ||||
|   std::atomic<unsigned> front_; | ||||
|   std::atomic<unsigned> back_; | ||||
|   Elem array_[kSize]; | ||||
|  | ||||
|   RunQueue(const RunQueue&) = delete; | ||||
|   void operator=(const RunQueue&) = delete; | ||||
| }; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ | ||||
							
								
								
									
										154
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										154
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,154 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H | ||||
| #define EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| // The implementation of the ThreadPool type ensures that the Schedule method | ||||
| // runs the functions it is provided in FIFO order when the scheduling is done | ||||
| // by a single thread. | ||||
| // Environment provides a way to create threads and also allows to intercept | ||||
| // task submission and execution. | ||||
| template <typename Environment> | ||||
| class SimpleThreadPoolTempl : public ThreadPoolInterface { | ||||
|  public: | ||||
|   // Construct a pool that contains "num_threads" threads. | ||||
|   explicit SimpleThreadPoolTempl(int num_threads, Environment env = Environment()) | ||||
|       : env_(env), threads_(num_threads), waiters_(num_threads) { | ||||
|     for (int i = 0; i < num_threads; i++) { | ||||
|       threads_.push_back(env.CreateThread([this, i]() { WorkerLoop(i); })); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Wait until all scheduled work has finished and then destroy the | ||||
|   // set of threads. | ||||
|   ~SimpleThreadPoolTempl() { | ||||
|     { | ||||
|       // Wait for all work to get done. | ||||
|       std::unique_lock<std::mutex> l(mu_); | ||||
|       while (!pending_.empty()) { | ||||
|         empty_.wait(l); | ||||
|       } | ||||
|       exiting_ = true; | ||||
|  | ||||
|       // Wakeup all waiters. | ||||
|       for (auto w : waiters_) { | ||||
|         w->ready = true; | ||||
|         w->task.f = nullptr; | ||||
|         w->cv.notify_one(); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Wait for threads to finish. | ||||
|     for (auto t : threads_) { | ||||
|       delete t; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Schedule fn() for execution in the pool of threads. The functions are | ||||
|   // executed in the order in which they are scheduled. | ||||
|   void Schedule(std::function<void()> fn) final { | ||||
|     Task t = env_.CreateTask(std::move(fn)); | ||||
|     std::unique_lock<std::mutex> l(mu_); | ||||
|     if (waiters_.empty()) { | ||||
|       pending_.push_back(std::move(t)); | ||||
|     } else { | ||||
|       Waiter* w = waiters_.back(); | ||||
|       waiters_.pop_back(); | ||||
|       w->ready = true; | ||||
|       w->task = std::move(t); | ||||
|       w->cv.notify_one(); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   int NumThreads() const final { | ||||
|     return static_cast<int>(threads_.size()); | ||||
|   } | ||||
|  | ||||
|   int CurrentThreadId() const final { | ||||
|     const PerThread* pt = this->GetPerThread(); | ||||
|     if (pt->pool == this) { | ||||
|       return pt->thread_id; | ||||
|     } else { | ||||
|       return -1; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   void WorkerLoop(int thread_id) { | ||||
|     std::unique_lock<std::mutex> l(mu_); | ||||
|     PerThread* pt = GetPerThread(); | ||||
|     pt->pool = this; | ||||
|     pt->thread_id = thread_id; | ||||
|     Waiter w; | ||||
|     Task t; | ||||
|     while (!exiting_) { | ||||
|       if (pending_.empty()) { | ||||
|         // Wait for work to be assigned to me | ||||
|         w.ready = false; | ||||
|         waiters_.push_back(&w); | ||||
|         while (!w.ready) { | ||||
|           w.cv.wait(l); | ||||
|         } | ||||
|         t = w.task; | ||||
|         w.task.f = nullptr; | ||||
|       } else { | ||||
|         // Pick up pending work | ||||
|         t = std::move(pending_.front()); | ||||
|         pending_.pop_front(); | ||||
|         if (pending_.empty()) { | ||||
|           empty_.notify_all(); | ||||
|         } | ||||
|       } | ||||
|       if (t.f) { | ||||
|         mu_.unlock(); | ||||
|         env_.ExecuteTask(t); | ||||
|         t.f = nullptr; | ||||
|         mu_.lock(); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   typedef typename Environment::Task Task; | ||||
|   typedef typename Environment::EnvThread Thread; | ||||
|  | ||||
|   struct Waiter { | ||||
|     std::condition_variable cv; | ||||
|     Task task; | ||||
|     bool ready; | ||||
|   }; | ||||
|  | ||||
|   struct PerThread { | ||||
|     constexpr PerThread() : pool(NULL), thread_id(-1) { } | ||||
|     SimpleThreadPoolTempl* pool;  // Parent pool, or null for normal threads. | ||||
|     int thread_id;                // Worker thread index in pool. | ||||
|   }; | ||||
|  | ||||
|   Environment env_; | ||||
|   std::mutex mu_; | ||||
|   MaxSizeVector<Thread*> threads_;  // All threads | ||||
|   MaxSizeVector<Waiter*> waiters_;  // Stack of waiting threads. | ||||
|   std::deque<Task> pending_;        // Queue of pending work | ||||
|   std::condition_variable empty_;   // Signaled on pending_.empty() | ||||
|   bool exiting_ = false; | ||||
|  | ||||
|   PerThread* GetPerThread() const { | ||||
|     EIGEN_THREAD_LOCAL PerThread per_thread; | ||||
|     return &per_thread; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| typedef SimpleThreadPoolTempl<StlThreadEnvironment> SimpleThreadPool; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H | ||||
							
								
								
									
										38
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H | ||||
| #define EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| struct StlThreadEnvironment { | ||||
|   struct Task { | ||||
|     std::function<void()> f; | ||||
|   }; | ||||
|  | ||||
|   // EnvThread constructor must start the thread, | ||||
|   // destructor must join the thread. | ||||
|   class EnvThread { | ||||
|    public: | ||||
|     EnvThread(std::function<void()> f) : thr_(std::move(f)) {} | ||||
|     ~EnvThread() { thr_.join(); } | ||||
|  | ||||
|    private: | ||||
|     std::thread thr_; | ||||
|   }; | ||||
|  | ||||
|   EnvThread* CreateThread(std::function<void()> f) { return new EnvThread(std::move(f)); } | ||||
|   Task CreateTask(std::function<void()> f) { return Task{std::move(f)}; } | ||||
|   void ExecuteTask(const Task& t) { t.f(); } | ||||
| }; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H | ||||
							
								
								
									
										22
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H | ||||
| #define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H | ||||
|  | ||||
| // Try to come up with a portable implementation of thread local variables | ||||
| #if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7) | ||||
| #define EIGEN_THREAD_LOCAL static __thread | ||||
| #elif EIGEN_COMP_CLANG | ||||
| #define EIGEN_THREAD_LOCAL static __thread | ||||
| #else | ||||
| #define EIGEN_THREAD_LOCAL static thread_local | ||||
| #endif | ||||
|  | ||||
| #endif  // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H | ||||
							
								
								
									
										33
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H | ||||
| #define EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| // This defines an interface that ThreadPoolDevice can take to use | ||||
| // custom thread pools underneath. | ||||
| class ThreadPoolInterface { | ||||
|  public: | ||||
|   virtual void Schedule(std::function<void()> fn) = 0; | ||||
|  | ||||
|   // Returns the number of threads in the pool. | ||||
|   virtual int NumThreads() const = 0; | ||||
|  | ||||
|   // Returns a logical thread index between 0 and NumThreads() - 1 if called | ||||
|   // from one of the threads in the pool. Returns -1 otherwise. | ||||
|   virtual int CurrentThreadId() const = 0; | ||||
|  | ||||
|   virtual ~ThreadPoolInterface() {} | ||||
| }; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H | ||||
							
								
								
									
										20
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H | ||||
| #define EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H | ||||
|  | ||||
| // Try to come up with a portable way to yield | ||||
| #if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7) | ||||
| #define EIGEN_THREAD_YIELD() sched_yield() | ||||
| #else | ||||
| #define EIGEN_THREAD_YIELD() std::this_thread::yield() | ||||
| #endif | ||||
|  | ||||
| #endif  // EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H | ||||
							
								
								
									
										542
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										542
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,542 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11META_H | ||||
| #define EIGEN_CXX11META_H | ||||
|  | ||||
| #include <vector> | ||||
| #include "EmulateArray.h" | ||||
|  | ||||
| // Emulate the cxx11 functionality that we need if the compiler doesn't support it. | ||||
| // Visual studio 2015 doesn't advertise itself as cxx11 compliant, although it | ||||
| // supports enough of the standard for our needs | ||||
| #if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 | ||||
|  | ||||
| #include "CXX11Workarounds.h" | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| /** \internal | ||||
|   * \file CXX11/util/CXX11Meta.h | ||||
|   * This file contains generic metaprogramming classes which are not specifically related to Eigen. | ||||
|   * This file expands upon Core/util/Meta.h and adds support for C++11 specific features. | ||||
|   */ | ||||
|  | ||||
| template<typename... tt> | ||||
| struct type_list { constexpr static int count = sizeof...(tt); }; | ||||
|  | ||||
| template<typename t, typename... tt> | ||||
| struct type_list<t, tt...> { constexpr static int count = sizeof...(tt) + 1; typedef t first_type; }; | ||||
|  | ||||
| template<typename T, T... nn> | ||||
| struct numeric_list { constexpr static std::size_t count = sizeof...(nn); }; | ||||
|  | ||||
| template<typename T, T n, T... nn> | ||||
| struct numeric_list<T, n, nn...> { constexpr static std::size_t count = sizeof...(nn) + 1; constexpr static T first_value = n; }; | ||||
|  | ||||
| /* numeric list constructors | ||||
|  * | ||||
|  * equivalencies: | ||||
|  *     constructor                                              result | ||||
|  *     typename gen_numeric_list<int, 5>::type                  numeric_list<int, 0,1,2,3,4> | ||||
|  *     typename gen_numeric_list_reversed<int, 5>::type         numeric_list<int, 4,3,2,1,0> | ||||
|  *     typename gen_numeric_list_swapped_pair<int, 5,1,2>::type numeric_list<int, 0,2,1,3,4> | ||||
|  *     typename gen_numeric_list_repeated<int, 0, 5>::type      numeric_list<int, 0,0,0,0,0> | ||||
|  */ | ||||
|  | ||||
| template<typename T, std::size_t n, T start = 0, T... ii> struct gen_numeric_list                     : gen_numeric_list<T, n-1, start, start + n-1, ii...> {}; | ||||
| template<typename T, T start, T... ii>                    struct gen_numeric_list<T, 0, start, ii...> { typedef numeric_list<T, ii...> type; }; | ||||
|  | ||||
| template<typename T, std::size_t n, T start = 0, T... ii> struct gen_numeric_list_reversed                     : gen_numeric_list_reversed<T, n-1, start, ii..., start + n-1> {}; | ||||
| template<typename T, T start, T... ii>                    struct gen_numeric_list_reversed<T, 0, start, ii...> { typedef numeric_list<T, ii...> type; }; | ||||
|  | ||||
| template<typename T, std::size_t n, T a, T b, T start = 0, T... ii> struct gen_numeric_list_swapped_pair                           : gen_numeric_list_swapped_pair<T, n-1, a, b, start, (start + n-1) == a ? b : ((start + n-1) == b ? a : (start + n-1)), ii...> {}; | ||||
| template<typename T, T a, T b, T start, T... ii>                    struct gen_numeric_list_swapped_pair<T, 0, a, b, start, ii...> { typedef numeric_list<T, ii...> type; }; | ||||
|  | ||||
| template<typename T, std::size_t n, T V, T... nn> struct gen_numeric_list_repeated                 : gen_numeric_list_repeated<T, n-1, V, V, nn...> {}; | ||||
| template<typename T, T V, T... nn>                struct gen_numeric_list_repeated<T, 0, V, nn...> { typedef numeric_list<T, nn...> type; }; | ||||
|  | ||||
| /* list manipulation: concatenate */ | ||||
|  | ||||
| template<class a, class b> struct concat; | ||||
|  | ||||
| template<typename... as, typename... bs> struct concat<type_list<as...>,       type_list<bs...>>        { typedef type_list<as..., bs...> type; }; | ||||
| template<typename T, T... as, T... bs>   struct concat<numeric_list<T, as...>, numeric_list<T, bs...> > { typedef numeric_list<T, as..., bs...> type; }; | ||||
|  | ||||
| template<typename... p> struct mconcat; | ||||
| template<typename a>                             struct mconcat<a>           { typedef a type; }; | ||||
| template<typename a, typename b>                 struct mconcat<a, b>        : concat<a, b> {}; | ||||
| template<typename a, typename b, typename... cs> struct mconcat<a, b, cs...> : concat<a, typename mconcat<b, cs...>::type> {}; | ||||
|  | ||||
| /* list manipulation: extract slices */ | ||||
|  | ||||
| template<int n, typename x> struct take; | ||||
| template<int n, typename a, typename... as> struct take<n, type_list<a, as...>> : concat<type_list<a>, typename take<n-1, type_list<as...>>::type> {}; | ||||
| template<int n>                             struct take<n, type_list<>>         { typedef type_list<> type; }; | ||||
| template<typename a, typename... as>        struct take<0, type_list<a, as...>> { typedef type_list<> type; }; | ||||
| template<>                                  struct take<0, type_list<>>         { typedef type_list<> type; }; | ||||
|  | ||||
| template<typename T, int n, T a, T... as> struct take<n, numeric_list<T, a, as...>> : concat<numeric_list<T, a>, typename take<n-1, numeric_list<T, as...>>::type> {}; | ||||
| template<typename T, int n>               struct take<n, numeric_list<T>>           { typedef numeric_list<T> type; }; | ||||
| template<typename T, T a, T... as>        struct take<0, numeric_list<T, a, as...>> { typedef numeric_list<T> type; }; | ||||
| template<typename T>                      struct take<0, numeric_list<T>>           { typedef numeric_list<T> type; }; | ||||
|  | ||||
| template<typename T, int n, T... ii>      struct h_skip_helper_numeric; | ||||
| template<typename T, int n, T i, T... ii> struct h_skip_helper_numeric<T, n, i, ii...> : h_skip_helper_numeric<T, n-1, ii...> {}; | ||||
| template<typename T, T i, T... ii>        struct h_skip_helper_numeric<T, 0, i, ii...> { typedef numeric_list<T, i, ii...> type; }; | ||||
| template<typename T, int n>               struct h_skip_helper_numeric<T, n>           { typedef numeric_list<T> type; }; | ||||
| template<typename T>                      struct h_skip_helper_numeric<T, 0>           { typedef numeric_list<T> type; }; | ||||
|  | ||||
| template<int n, typename... tt>             struct h_skip_helper_type; | ||||
| template<int n, typename t, typename... tt> struct h_skip_helper_type<n, t, tt...> : h_skip_helper_type<n-1, tt...> {}; | ||||
| template<typename t, typename... tt>        struct h_skip_helper_type<0, t, tt...> { typedef type_list<t, tt...> type; }; | ||||
| template<int n>                             struct h_skip_helper_type<n>           { typedef type_list<> type; }; | ||||
| template<>                                  struct h_skip_helper_type<0>           { typedef type_list<> type; }; | ||||
|  | ||||
| template<int n> | ||||
| struct h_skip { | ||||
|   template<typename T, T... ii> | ||||
|   constexpr static inline typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); } | ||||
|   template<typename... tt> | ||||
|   constexpr static inline typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); } | ||||
| }; | ||||
|  | ||||
| template<int n, typename a> struct skip { typedef decltype(h_skip<n>::helper(a())) type; }; | ||||
|  | ||||
| template<int start, int count, typename a> struct slice : take<count, typename skip<start, a>::type> {}; | ||||
|  | ||||
| /* list manipulation: retrieve single element from list */ | ||||
|  | ||||
| template<int n, typename x> struct get; | ||||
|  | ||||
| template<int n, typename a, typename... as>               struct get<n, type_list<a, as...>>   : get<n-1, type_list<as...>> {}; | ||||
| template<typename a, typename... as>                      struct get<0, type_list<a, as...>>   { typedef a type; }; | ||||
|  | ||||
| template<typename T, int n, T a, T... as>                        struct get<n, numeric_list<T, a, as...>>   : get<n-1, numeric_list<T, as...>> {}; | ||||
| template<typename T, T a, T... as>                               struct get<0, numeric_list<T, a, as...>>   { constexpr static T value = a; }; | ||||
|  | ||||
| /* always get type, regardless of dummy; good for parameter pack expansion */ | ||||
|  | ||||
| template<typename T, T dummy, typename t> struct id_numeric  { typedef t type; }; | ||||
| template<typename dummy, typename t>      struct id_type     { typedef t type; }; | ||||
|  | ||||
| /* equality checking, flagged version */ | ||||
|  | ||||
| template<typename a, typename b> struct is_same_gf : is_same<a, b> { constexpr static int global_flags = 0; }; | ||||
|  | ||||
| /* apply_op to list */ | ||||
|  | ||||
| template< | ||||
|   bool from_left, // false | ||||
|   template<typename, typename> class op, | ||||
|   typename additional_param, | ||||
|   typename... values | ||||
| > | ||||
| struct h_apply_op_helper                                        { typedef type_list<typename op<values, additional_param>::type...> type; }; | ||||
| template< | ||||
|   template<typename, typename> class op, | ||||
|   typename additional_param, | ||||
|   typename... values | ||||
| > | ||||
| struct h_apply_op_helper<true, op, additional_param, values...> { typedef type_list<typename op<additional_param, values>::type...> type; }; | ||||
|  | ||||
| template< | ||||
|   bool from_left, | ||||
|   template<typename, typename> class op, | ||||
|   typename additional_param | ||||
| > | ||||
| struct h_apply_op | ||||
| { | ||||
|   template<typename... values> | ||||
|   constexpr static typename h_apply_op_helper<from_left, op, additional_param, values...>::type helper(type_list<values...>) | ||||
|   { return typename h_apply_op_helper<from_left, op, additional_param, values...>::type(); } | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class op, | ||||
|   typename additional_param, | ||||
|   typename a | ||||
| > | ||||
| struct apply_op_from_left { typedef decltype(h_apply_op<true, op, additional_param>::helper(a())) type; }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class op, | ||||
|   typename additional_param, | ||||
|   typename a | ||||
| > | ||||
| struct apply_op_from_right { typedef decltype(h_apply_op<false, op, additional_param>::helper(a())) type; }; | ||||
|  | ||||
| /* see if an element is in a list */ | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class test, | ||||
|   typename check_against, | ||||
|   typename h_list, | ||||
|   bool last_check_positive = false | ||||
| > | ||||
| struct contained_in_list; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class test, | ||||
|   typename check_against, | ||||
|   typename h_list | ||||
| > | ||||
| struct contained_in_list<test, check_against, h_list, true> | ||||
| { | ||||
|   constexpr static bool value = true; | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class test, | ||||
|   typename check_against, | ||||
|   typename a, | ||||
|   typename... as | ||||
| > | ||||
| struct contained_in_list<test, check_against, type_list<a, as...>, false> : contained_in_list<test, check_against, type_list<as...>, test<check_against, a>::value> {}; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class test, | ||||
|   typename check_against | ||||
|   EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty) | ||||
| > | ||||
| struct contained_in_list<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, false> { constexpr static bool value = false; }; | ||||
|  | ||||
| /* see if an element is in a list and check for global flags */ | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class test, | ||||
|   typename check_against, | ||||
|   typename h_list, | ||||
|   int default_flags = 0, | ||||
|   bool last_check_positive = false, | ||||
|   int last_check_flags = default_flags | ||||
| > | ||||
| struct contained_in_list_gf; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class test, | ||||
|   typename check_against, | ||||
|   typename h_list, | ||||
|   int default_flags, | ||||
|   int last_check_flags | ||||
| > | ||||
| struct contained_in_list_gf<test, check_against, h_list, default_flags, true, last_check_flags> | ||||
| { | ||||
|   constexpr static bool value = true; | ||||
|   constexpr static int global_flags = last_check_flags; | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class test, | ||||
|   typename check_against, | ||||
|   typename a, | ||||
|   typename... as, | ||||
|   int default_flags, | ||||
|   int last_check_flags | ||||
| > | ||||
| struct contained_in_list_gf<test, check_against, type_list<a, as...>, default_flags, false, last_check_flags> : contained_in_list_gf<test, check_against, type_list<as...>, default_flags, test<check_against, a>::value, test<check_against, a>::global_flags> {}; | ||||
|  | ||||
| template< | ||||
|   template<typename, typename> class test, | ||||
|   typename check_against | ||||
|   EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty), | ||||
|   int default_flags, | ||||
|   int last_check_flags | ||||
| > | ||||
| struct contained_in_list_gf<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, default_flags, false, last_check_flags> { constexpr static bool value = false; constexpr static int global_flags = default_flags; }; | ||||
|  | ||||
| /* generic reductions */ | ||||
|  | ||||
| template< | ||||
|   typename Reducer, | ||||
|   typename... Ts | ||||
| > struct reduce; | ||||
|  | ||||
| template< | ||||
|   typename Reducer | ||||
| > struct reduce<Reducer> | ||||
| { | ||||
|   constexpr static inline int run() { return Reducer::Identity; } | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   typename Reducer, | ||||
|   typename A | ||||
| > struct reduce<Reducer, A> | ||||
| { | ||||
|   constexpr static inline A run(A a) { return a; } | ||||
| }; | ||||
|  | ||||
| template< | ||||
|   typename Reducer, | ||||
|   typename A, | ||||
|   typename... Ts | ||||
| > struct reduce<Reducer, A, Ts...> | ||||
| { | ||||
|   constexpr static inline auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, Ts...>::run(ts...))) { | ||||
|     return Reducer::run(a, reduce<Reducer, Ts...>::run(ts...)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| /* generic binary operations */ | ||||
|  | ||||
| struct sum_op           { | ||||
|   template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a + b)   { return a + b;   } | ||||
|   static constexpr int Identity = 0; | ||||
| }; | ||||
| struct product_op       { | ||||
|   template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a * b)   { return a * b;   } | ||||
|   static constexpr int Identity = 1; | ||||
| }; | ||||
|  | ||||
| struct logical_and_op   { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a && b)  { return a && b;  } }; | ||||
| struct logical_or_op    { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a || b)  { return a || b;  } }; | ||||
|  | ||||
| struct equal_op         { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a == b)  { return a == b;  } }; | ||||
| struct not_equal_op     { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a != b)  { return a != b;  } }; | ||||
| struct lesser_op        { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a < b)   { return a < b;   } }; | ||||
| struct lesser_equal_op  { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a <= b)  { return a <= b;  } }; | ||||
| struct greater_op       { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a > b)   { return a > b;   } }; | ||||
| struct greater_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a >= b)  { return a >= b;  } }; | ||||
|  | ||||
| /* generic unary operations */ | ||||
|  | ||||
| struct not_op                { template<typename A> constexpr static inline auto run(A a) -> decltype(!a)      { return !a;      } }; | ||||
| struct negation_op           { template<typename A> constexpr static inline auto run(A a) -> decltype(-a)      { return -a;      } }; | ||||
| struct greater_equal_zero_op { template<typename A> constexpr static inline auto run(A a) -> decltype(a >= 0)  { return a >= 0;  } }; | ||||
|  | ||||
|  | ||||
| /* reductions for lists */ | ||||
|  | ||||
| // using auto -> return value spec makes ICC 13.0 and 13.1 crash here, so we have to hack it | ||||
| // together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1 | ||||
| // does... | ||||
| template<typename... Ts> | ||||
| constexpr inline decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts) | ||||
| { | ||||
|   return reduce<product_op, Ts...>::run(ts...); | ||||
| } | ||||
|  | ||||
| template<typename... Ts> | ||||
| constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts) | ||||
| { | ||||
|   return reduce<sum_op, Ts...>::run(ts...); | ||||
| } | ||||
|  | ||||
| /* reverse arrays */ | ||||
|  | ||||
| template<typename Array, int... n> | ||||
| constexpr inline Array h_array_reverse(Array arr, numeric_list<int, n...>) | ||||
| { | ||||
|   return {{array_get<sizeof...(n) - n - 1>(arr)...}}; | ||||
| } | ||||
|  | ||||
| template<typename T, std::size_t N> | ||||
| constexpr inline array<T, N> array_reverse(array<T, N> arr) | ||||
| { | ||||
|   return h_array_reverse(arr, typename gen_numeric_list<int, N>::type()); | ||||
| } | ||||
|  | ||||
|  | ||||
| /* generic array reductions */ | ||||
|  | ||||
| // can't reuse standard reduce() interface above because Intel's Compiler | ||||
| // *really* doesn't like it, so we just reimplement the stuff | ||||
| // (start from N - 1 and work down to 0 because specialization for | ||||
| // n == N - 1 also doesn't work in Intel's compiler, so it goes into | ||||
| // an infinite loop) | ||||
| template<typename Reducer, typename T, std::size_t N, std::size_t n = N - 1> | ||||
| struct h_array_reduce { | ||||
|   EIGEN_DEVICE_FUNC constexpr static inline auto run(array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr))) | ||||
|   { | ||||
|     return Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Reducer, typename T, std::size_t N> | ||||
| struct h_array_reduce<Reducer, T, N, 0> | ||||
| { | ||||
|   EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, N>& arr, T) | ||||
|   { | ||||
|     return array_get<0>(arr); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Reducer, typename T> | ||||
| struct h_array_reduce<Reducer, T, 0> | ||||
| { | ||||
|   EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, 0>&, T identity) | ||||
|   { | ||||
|     return identity; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Reducer, typename T, std::size_t N> | ||||
| EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array<T, N>& arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr, identity)) | ||||
| { | ||||
|   return h_array_reduce<Reducer, T, N>::run(arr, identity); | ||||
| } | ||||
|  | ||||
| /* standard array reductions */ | ||||
|  | ||||
| template<typename T, std::size_t N> | ||||
| EIGEN_DEVICE_FUNC constexpr inline auto array_sum(const array<T, N>& arr) -> decltype(array_reduce<sum_op, T, N>(arr, static_cast<T>(0))) | ||||
| { | ||||
|   return array_reduce<sum_op, T, N>(arr, static_cast<T>(0)); | ||||
| } | ||||
|  | ||||
| template<typename T, std::size_t N> | ||||
| EIGEN_DEVICE_FUNC constexpr inline auto array_prod(const array<T, N>& arr) -> decltype(array_reduce<product_op, T, N>(arr, static_cast<T>(1))) | ||||
| { | ||||
|   return array_reduce<product_op, T, N>(arr, static_cast<T>(1)); | ||||
| } | ||||
|  | ||||
| template<typename t> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) { | ||||
|   eigen_assert(a.size() > 0); | ||||
|   t prod = 1; | ||||
|   for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; } | ||||
|   return prod; | ||||
| } | ||||
|  | ||||
| /* zip an array */ | ||||
|  | ||||
| template<typename Op, typename A, typename B, std::size_t N, int... n> | ||||
| constexpr inline array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>) | ||||
| { | ||||
|   return array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }}; | ||||
| } | ||||
|  | ||||
| template<typename Op, typename A, typename B, std::size_t N> | ||||
| constexpr inline array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, array<B, N> b) | ||||
| { | ||||
|   return h_array_zip<Op>(a, b, typename gen_numeric_list<int, N>::type()); | ||||
| } | ||||
|  | ||||
| /* zip an array and reduce the result */ | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, typename B, std::size_t N, int... n> | ||||
| constexpr inline auto h_array_zip_and_reduce(array<A, N> a, array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...)) | ||||
| { | ||||
|   return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...); | ||||
| } | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, typename B, std::size_t N> | ||||
| constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type())) | ||||
| { | ||||
|   return h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()); | ||||
| } | ||||
|  | ||||
| /* apply stuff to an array */ | ||||
|  | ||||
| template<typename Op, typename A, std::size_t N, int... n> | ||||
| constexpr inline array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>) | ||||
| { | ||||
|   return array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }}; | ||||
| } | ||||
|  | ||||
| template<typename Op, typename A, std::size_t N> | ||||
| constexpr inline array<decltype(Op::run(A())),N> array_apply(array<A, N> a) | ||||
| { | ||||
|   return h_array_apply<Op>(a, typename gen_numeric_list<int, N>::type()); | ||||
| } | ||||
|  | ||||
| /* apply stuff to an array and reduce */ | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, std::size_t N, int... n> | ||||
| constexpr inline auto h_array_apply_and_reduce(array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...)) | ||||
| { | ||||
|   return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...); | ||||
| } | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, std::size_t N> | ||||
| constexpr inline auto array_apply_and_reduce(array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type())) | ||||
| { | ||||
|   return h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()); | ||||
| } | ||||
|  | ||||
| /* repeat a value n times (and make an array out of it | ||||
|  * usage: | ||||
|  *   array<int, 16> = repeat<16>(42); | ||||
|  */ | ||||
|  | ||||
| template<int n> | ||||
| struct h_repeat | ||||
| { | ||||
|   template<typename t, int... ii> | ||||
|   constexpr static inline array<t, n> run(t v, numeric_list<int, ii...>) | ||||
|   { | ||||
|     return {{ typename id_numeric<int, ii, t>::type(v)... }}; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<int n, typename t> | ||||
| constexpr array<t, n> repeat(t v) { return h_repeat<n>::run(v, typename gen_numeric_list<int, n>::type()); } | ||||
|  | ||||
| /* instantiate a class by a C-style array */ | ||||
| template<class InstType, typename ArrType, std::size_t N, bool Reverse, typename... Ps> | ||||
| struct h_instantiate_by_c_array; | ||||
|  | ||||
| template<class InstType, typename ArrType, std::size_t N, typename... Ps> | ||||
| struct h_instantiate_by_c_array<InstType, ArrType, N, false, Ps...> | ||||
| { | ||||
|   static InstType run(ArrType* arr, Ps... args) | ||||
|   { | ||||
|     return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, Ps..., ArrType>::run(arr + 1, args..., arr[0]); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class InstType, typename ArrType, std::size_t N, typename... Ps> | ||||
| struct h_instantiate_by_c_array<InstType, ArrType, N, true, Ps...> | ||||
| { | ||||
|   static InstType run(ArrType* arr, Ps... args) | ||||
|   { | ||||
|     return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, ArrType, Ps...>::run(arr + 1, arr[0], args...); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class InstType, typename ArrType, typename... Ps> | ||||
| struct h_instantiate_by_c_array<InstType, ArrType, 0, false, Ps...> | ||||
| { | ||||
|   static InstType run(ArrType* arr, Ps... args) | ||||
|   { | ||||
|     (void)arr; | ||||
|     return InstType(args...); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class InstType, typename ArrType, typename... Ps> | ||||
| struct h_instantiate_by_c_array<InstType, ArrType, 0, true, Ps...> | ||||
| { | ||||
|   static InstType run(ArrType* arr, Ps... args) | ||||
|   { | ||||
|     (void)arr; | ||||
|     return InstType(args...); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class InstType, typename ArrType, std::size_t N, bool Reverse = false> | ||||
| InstType instantiate_by_c_array(ArrType* arr) | ||||
| { | ||||
|   return h_instantiate_by_c_array<InstType, ArrType, N, Reverse>::run(arr); | ||||
| } | ||||
|  | ||||
| } // end namespace internal | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #else // Non C++11, fallback to emulation mode | ||||
|  | ||||
| #include "EmulateCXX11Meta.h" | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #endif // EIGEN_CXX11META_H | ||||
							
								
								
									
										88
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2013 Christian Seiler <christian@iwakd.de> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_CXX11WORKAROUNDS_H | ||||
| #define EIGEN_CXX11WORKAROUNDS_H | ||||
|  | ||||
| /* COMPATIBILITY CHECKS | ||||
|  * (so users of compilers that are too old get some realistic error messages) | ||||
|  */ | ||||
| #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1310) | ||||
| #error Intel Compiler only supports required C++ features since version 13.1. | ||||
| // note that most stuff in principle works with 13.0 but when combining | ||||
| // some features, at some point 13.0 will just fail with an internal assertion | ||||
| #elif defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)) | ||||
| // G++ < 4.6 by default will continue processing the source files - even if we use #error to make | ||||
| // it error out. For this reason, we use the pragma to make sure G++ aborts at the first error | ||||
| // it sees. Unfortunately, that is still not our #error directive, but at least the output is | ||||
| // short enough the user has a chance to see that the compiler version is not sufficient for | ||||
| // the funky template mojo we use. | ||||
| #pragma GCC diagnostic error "-Wfatal-errors" | ||||
| #error GNU C++ Compiler (g++) only supports required C++ features since version 4.6. | ||||
| #endif | ||||
|  | ||||
| /* Check that the compiler at least claims to support C++11. It might not be sufficient | ||||
|  * because the compiler may not implement it correctly, but at least we'll know. | ||||
|  * On the other hand, visual studio still doesn't claim to support C++11 although it's | ||||
|  * compliant enugh for our purpose. | ||||
|  */ | ||||
| #if (__cplusplus <= 199711L) && (EIGEN_COMP_MSVC < 1900) | ||||
| #if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) | ||||
| #pragma GCC diagnostic error "-Wfatal-errors" | ||||
| #endif | ||||
| #error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.) | ||||
| #endif | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| /* std::get is only constexpr in C++14, not yet in C++11 | ||||
|  */ | ||||
|  | ||||
|  | ||||
| template<std::size_t I, class T> constexpr inline T&       array_get(std::vector<T>&       a) { return a[I]; } | ||||
| template<std::size_t I, class T> constexpr inline T&&      array_get(std::vector<T>&&      a) { return a[I]; } | ||||
| template<std::size_t I, class T> constexpr inline T const& array_get(std::vector<T> const& a) { return a[I]; } | ||||
|  | ||||
| /* Suppose you have a template of the form | ||||
|  * template<typename T> struct X; | ||||
|  * And you want to specialize it in such a way: | ||||
|  *    template<typename S1, typename... SN> struct X<Foo<S1, SN...>> { ::: }; | ||||
|  *    template<>                            struct X<Foo<>>          { ::: }; | ||||
|  * This will work in Intel's compiler 13.0, but only to some extent in g++ 4.6, since | ||||
|  * g++ can only match templates called with parameter packs if the number of template | ||||
|  * arguments is not a fixed size (so inside the first specialization, referencing | ||||
|  * X<Foo<Sn...>> will fail in g++). On the other hand, g++ will accept the following: | ||||
|  *    template<typename S...> struct X<Foo<S...>> { ::: }: | ||||
|  * as an additional (!) specialization, which will then only match the empty case. | ||||
|  * But Intel's compiler 13.0 won't accept that, it will only accept the empty syntax, | ||||
|  * so we have to create a workaround for this. | ||||
|  */ | ||||
| #if defined(__GNUC__) && !defined(__INTEL_COMPILER) | ||||
| #define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n)    mt... n | ||||
| #define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n)   , EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) | ||||
| #define EIGEN_TPL_PP_SPEC_HACK_USE(n)        n... | ||||
| #define EIGEN_TPL_PP_SPEC_HACK_USEC(n)       , n... | ||||
| #else | ||||
| #define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) | ||||
| #define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n) | ||||
| #define EIGEN_TPL_PP_SPEC_HACK_USE(n) | ||||
| #define EIGEN_TPL_PP_SPEC_HACK_USEC(n) | ||||
| #endif | ||||
|  | ||||
| } // end namespace internal | ||||
|  | ||||
| } // end namespace Eigen | ||||
|  | ||||
| #endif // EIGEN_CXX11WORKAROUNDS_H | ||||
|  | ||||
| /* | ||||
|  * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; | ||||
|  */ | ||||
							
								
								
									
										267
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,267 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_EMULATE_ARRAY_H | ||||
| #define EIGEN_EMULATE_ARRAY_H | ||||
|  | ||||
|  | ||||
|  | ||||
| // The array class is only available starting with cxx11. Emulate our own here | ||||
| // if needed. Beware, msvc still doesn't advertise itself as a c++11 compiler! | ||||
| // Moreover, CUDA doesn't support the STL containers, so we use our own instead. | ||||
| #if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(__CUDACC__) || defined(EIGEN_AVOID_STL_ARRAY) | ||||
|  | ||||
| namespace Eigen { | ||||
| template <typename T, size_t n> class array { | ||||
|  public: | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE T& front() { return values[0]; } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE const T& front() const { return values[0]; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE T& back() { return values[n-1]; } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE | ||||
|   static std::size_t size() { return n; } | ||||
|  | ||||
|   T values[n]; | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array() { } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array(const T& v) { | ||||
|     EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     values[0] = v; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array(const T& v1, const T& v2) { | ||||
|     EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     values[0] = v1; | ||||
|     values[1] = v2; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) { | ||||
|     EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     values[0] = v1; | ||||
|     values[1] = v2; | ||||
|     values[2] = v3; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, | ||||
|                             const T& v4) { | ||||
|     EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     values[0] = v1; | ||||
|     values[1] = v2; | ||||
|     values[2] = v3; | ||||
|     values[3] = v4; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, | ||||
|                             const T& v5) { | ||||
|     EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     values[0] = v1; | ||||
|     values[1] = v2; | ||||
|     values[2] = v3; | ||||
|     values[3] = v4; | ||||
|     values[4] = v5; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, | ||||
|                             const T& v5, const T& v6) { | ||||
|     EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     values[0] = v1; | ||||
|     values[1] = v2; | ||||
|     values[2] = v3; | ||||
|     values[3] = v4; | ||||
|     values[4] = v5; | ||||
|     values[5] = v6; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, | ||||
|                             const T& v5, const T& v6, const T& v7) { | ||||
|     EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     values[0] = v1; | ||||
|     values[1] = v2; | ||||
|     values[2] = v3; | ||||
|     values[3] = v4; | ||||
|     values[4] = v5; | ||||
|     values[5] = v6; | ||||
|     values[6] = v7; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array( | ||||
|       const T& v1, const T& v2, const T& v3, const T& v4, | ||||
|       const T& v5, const T& v6, const T& v7, const T& v8) { | ||||
|     EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) | ||||
|     values[0] = v1; | ||||
|     values[1] = v2; | ||||
|     values[2] = v3; | ||||
|     values[3] = v4; | ||||
|     values[4] = v5; | ||||
|     values[5] = v6; | ||||
|     values[6] = v7; | ||||
|     values[7] = v8; | ||||
|   } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array(std::initializer_list<T> l) { | ||||
|     eigen_assert(l.size() == n); | ||||
|     internal::smart_copy(l.begin(), l.end(), values); | ||||
|   } | ||||
| #endif | ||||
| }; | ||||
|  | ||||
|  | ||||
| // Specialize array for zero size | ||||
| template <typename T> class array<T, 0> { | ||||
|  public: | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE T& operator[] (size_t) { | ||||
|     eigen_assert(false && "Can't index a zero size array"); | ||||
|     return dummy; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE const T& operator[] (size_t) const { | ||||
|     eigen_assert(false && "Can't index a zero size array"); | ||||
|     return dummy; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE T& front() { | ||||
|     eigen_assert(false && "Can't index a zero size array"); | ||||
|     return dummy; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE const T& front() const { | ||||
|     eigen_assert(false && "Can't index a zero size array"); | ||||
|     return dummy; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE T& back() { | ||||
|     eigen_assert(false && "Can't index a zero size array"); | ||||
|     return dummy; | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE const T& back() const { | ||||
|     eigen_assert(false && "Can't index a zero size array"); | ||||
|     return dummy; | ||||
|   } | ||||
|  | ||||
|   static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   EIGEN_STRONG_INLINE array() : dummy() { } | ||||
|  | ||||
| #if EIGEN_HAS_VARIADIC_TEMPLATES | ||||
|   EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() { | ||||
|     eigen_assert(l.size() == 0); | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|  private: | ||||
|   T dummy; | ||||
| }; | ||||
|  | ||||
| // Comparison operator | ||||
| // Todo: implement !=, <, <=, >,  and >= | ||||
| template<class T, std::size_t N> | ||||
| EIGEN_DEVICE_FUNC bool operator==(const array<T,N>& lhs, const array<T,N>& rhs) { | ||||
|   for (std::size_t i = 0; i < N; ++i) { | ||||
|     if (lhs[i] != rhs[i]) { | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
|  | ||||
| namespace internal { | ||||
| template<std::size_t I, class T, std::size_t N> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array<T,N>& a) { | ||||
|   return a[I]; | ||||
| } | ||||
| template<std::size_t I, class T, std::size_t N> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) { | ||||
|   return a[I]; | ||||
| } | ||||
|  | ||||
| template <typename T> struct array_size; | ||||
| template<class T, std::size_t N> struct array_size<array<T,N> > { | ||||
|   static const size_t value = N; | ||||
| }; | ||||
| template <typename T> struct array_size; | ||||
| template<class T, std::size_t N> struct array_size<array<T,N>& > { | ||||
|   static const size_t value = N; | ||||
| }; | ||||
| template <typename T> struct array_size; | ||||
| template<class T, std::size_t N> struct array_size<const array<T,N> > { | ||||
|   static const size_t value = N; | ||||
| }; | ||||
| template <typename T> struct array_size; | ||||
| template<class T, std::size_t N> struct array_size<const array<T,N>& > { | ||||
|   static const size_t value = N; | ||||
| }; | ||||
|  | ||||
| }  // end namespace internal | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #else | ||||
|  | ||||
| // The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen::array | ||||
| #include <array> | ||||
| namespace Eigen { | ||||
|  | ||||
| template <typename T, std::size_t N> using array = std::array<T, N>; | ||||
|  | ||||
| namespace internal { | ||||
| /* std::get is only constexpr in C++14, not yet in C++11 | ||||
|  *     - libstdc++ from version 4.7 onwards has it nevertheless, | ||||
|  *                                          so use that | ||||
|  *     - libstdc++ older versions: use _M_instance directly | ||||
|  *     - libc++ all versions so far: use __elems_ directly | ||||
|  *     - all other libs: use std::get to be portable, but | ||||
|  *                       this may not be constexpr | ||||
|  */ | ||||
| #if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322 | ||||
| #define STD_GET_ARR_HACK             a._M_instance[I] | ||||
| #elif defined(_LIBCPP_VERSION) | ||||
| #define STD_GET_ARR_HACK             a.__elems_[I] | ||||
| #else | ||||
| #define STD_GET_ARR_HACK             std::template get<I, T, N>(a) | ||||
| #endif | ||||
|  | ||||
| template<std::size_t I, class T, std::size_t N> constexpr inline T&       array_get(std::array<T,N>&       a) { return (T&)       STD_GET_ARR_HACK; } | ||||
| template<std::size_t I, class T, std::size_t N> constexpr inline T&&      array_get(std::array<T,N>&&      a) { return (T&&)      STD_GET_ARR_HACK; } | ||||
| template<std::size_t I, class T, std::size_t N> constexpr inline T const& array_get(std::array<T,N> const& a) { return (T const&) STD_GET_ARR_HACK; } | ||||
|  | ||||
| #undef STD_GET_ARR_HACK | ||||
|  | ||||
| template <typename T> struct array_size; | ||||
| template<class T, std::size_t N> struct array_size<const std::array<T,N> > { | ||||
|   static const size_t value = N; | ||||
| }; | ||||
| template <typename T> struct array_size; | ||||
| template<class T, std::size_t N> struct array_size<std::array<T,N> > { | ||||
|   static const size_t value = N; | ||||
| }; | ||||
| }  // end namespace internal | ||||
| }  // end namespace Eigen | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #endif  // EIGEN_EMULATE_ARRAY_H | ||||
							
								
								
									
										311
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										311
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,311 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_EMULATE_CXX11_META_H | ||||
| #define EIGEN_EMULATE_CXX11_META_H | ||||
|  | ||||
|  | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| namespace internal { | ||||
|  | ||||
| /** \internal | ||||
|   * \file CXX11/util/EmulateCXX11Meta.h | ||||
|   * This file emulates a subset of the functionality provided by CXXMeta.h for | ||||
|   * compilers that don't yet support cxx11 such as nvcc. | ||||
|   */ | ||||
|  | ||||
| struct empty_list { static const std::size_t count = 0; }; | ||||
|  | ||||
| template<typename T, typename Tail=empty_list> struct type_list { | ||||
|   typedef T HeadType; | ||||
|   typedef Tail TailType; | ||||
|   static const T head; | ||||
|   static const Tail tail; | ||||
|   static const std::size_t count = 1 + Tail::count; | ||||
| }; | ||||
|  | ||||
| struct null_type { }; | ||||
|  | ||||
| template<typename T1 = null_type, typename T2 = null_type, typename T3 = null_type, | ||||
|          typename T4 = null_type, typename T5 = null_type, typename T6 = null_type, | ||||
|          typename T7 = null_type, typename T8 = null_type> | ||||
| struct make_type_list { | ||||
|   typedef typename make_type_list<T2, T3, T4, T5, T6, T7, T8>::type tailresult; | ||||
|  | ||||
|   typedef type_list<T1, tailresult> type; | ||||
| }; | ||||
|  | ||||
| template<> struct make_type_list<> { | ||||
|   typedef empty_list type; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <std::size_t index, class TList> struct get_type; | ||||
|  | ||||
| template <class Head, class Tail> | ||||
| struct get_type<0, type_list<Head, Tail> > | ||||
| { | ||||
|   typedef Head type; | ||||
| }; | ||||
|  | ||||
| template <std::size_t i, class Head, class Tail> | ||||
| struct get_type<i, type_list<Head, Tail> > | ||||
| { | ||||
|   typedef typename get_type<i-1, Tail>::type type; | ||||
| }; | ||||
|  | ||||
|  | ||||
| /* numeric list */ | ||||
| template <typename T, T n> | ||||
| struct type2val { | ||||
|   typedef T type; | ||||
|   static const T value = n; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename T, size_t n, T V> struct gen_numeric_list_repeated; | ||||
|  | ||||
| template<typename T, T V> struct gen_numeric_list_repeated<T, 1, V> { | ||||
|   typedef typename make_type_list<type2val<T, V> >::type type; | ||||
| }; | ||||
|  | ||||
| template<typename T, T V> struct gen_numeric_list_repeated<T, 2, V> { | ||||
|   typedef typename make_type_list<type2val<T, V>, type2val<T, V> >::type type; | ||||
| }; | ||||
|  | ||||
| template<typename T, T V> struct gen_numeric_list_repeated<T, 3, V> { | ||||
|   typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; | ||||
| }; | ||||
|  | ||||
| template<typename T, T V> struct gen_numeric_list_repeated<T, 4, V> { | ||||
|   typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; | ||||
| }; | ||||
|  | ||||
| template<typename T, T V> struct gen_numeric_list_repeated<T, 5, V> { | ||||
|   typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; | ||||
| }; | ||||
|  | ||||
| template<typename T, T V> struct gen_numeric_list_repeated<T, 6, V> { | ||||
|   typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, | ||||
|                                   type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; | ||||
| }; | ||||
|  | ||||
| template<typename T, T V> struct gen_numeric_list_repeated<T, 7, V> { | ||||
|   typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, | ||||
|                                   type2val<T, V>, type2val<T, V>, type2val<T, V>, | ||||
|                                   type2val<T, V> >::type type; | ||||
| }; | ||||
|  | ||||
| template<typename T, T V> struct gen_numeric_list_repeated<T, 8, V> { | ||||
|   typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, | ||||
|                                   type2val<T, V>, type2val<T, V>, type2val<T, V>, | ||||
|                                   type2val<T, V>, type2val<T, V> >::type type; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <std::size_t index, class NList> struct get; | ||||
|  | ||||
| template <std::size_t i> | ||||
| struct get<i, empty_list> | ||||
| { | ||||
|   get() { eigen_assert(false && "index overflow"); } | ||||
|   typedef void type; | ||||
|   static const char value = '\0'; | ||||
| }; | ||||
|  | ||||
| template <std::size_t i, class Head> | ||||
| struct get<i, type_list<Head, empty_list> > | ||||
| { | ||||
|   get() { eigen_assert(false && "index overflow"); } | ||||
|   typedef void type; | ||||
|   static const char value = '\0'; | ||||
| }; | ||||
|  | ||||
| template <class Head> | ||||
| struct get<0, type_list<Head, empty_list> > | ||||
| { | ||||
|   typedef typename Head::type type; | ||||
|   static const type value = Head::value; | ||||
| }; | ||||
|  | ||||
| template <class Head, class Tail> | ||||
| struct get<0, type_list<Head, Tail> > | ||||
| { | ||||
|   typedef typename Head::type type; | ||||
|   static const type value = Head::value; | ||||
| }; | ||||
|  | ||||
| template <std::size_t i, class Head, class Tail> | ||||
| struct get<i, type_list<Head, Tail> > | ||||
| { | ||||
|   typedef typename Tail::HeadType::type type; | ||||
|   static const type value = get<i-1, Tail>::value; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <class NList> struct arg_prod { | ||||
|   static const typename NList::HeadType::type value = get<0, NList>::value * arg_prod<typename NList::TailType>::value; | ||||
| }; | ||||
| template <> struct arg_prod<empty_list> { | ||||
|   static const int value = 1; | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<int n, typename t> | ||||
| array<t, n> repeat(t v) { | ||||
|   array<t, n> array; | ||||
|   array.fill(v); | ||||
|   return array; | ||||
| } | ||||
|  | ||||
| template<std::size_t I, class Head, class Tail> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>&) { | ||||
|   return get<I, type_list<Head, Tail> >::value; | ||||
| } | ||||
| template<std::size_t I, class Head, class Tail> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>&) { | ||||
|   return get<I, type_list<Head, Tail> >::value; | ||||
| } | ||||
|  | ||||
| template <class NList> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList&) { | ||||
|   return arg_prod<NList>::value; | ||||
| } | ||||
|  | ||||
| template<typename t, std::size_t n> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, n>& a) { | ||||
|   t prod = 1; | ||||
|   for (size_t i = 0; i < n; ++i) { prod *= a[i]; } | ||||
|   return prod; | ||||
| } | ||||
| template<typename t> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, 0>& /*a*/) { | ||||
|   return 1; | ||||
| } | ||||
|  | ||||
| template<typename t> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) { | ||||
|   eigen_assert(a.size() > 0); | ||||
|   t prod = 1; | ||||
|   for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; } | ||||
|   return prod; | ||||
| } | ||||
|  | ||||
|  | ||||
| template<std::size_t I, class T> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector<T>& a) { | ||||
|   return a[I]; | ||||
| } | ||||
| template<std::size_t I, class T> | ||||
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector<T>& a) { | ||||
|   return a[I]; | ||||
| } | ||||
|  | ||||
| struct sum_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a + b; } | ||||
| }; | ||||
| struct product_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a * b; } | ||||
| }; | ||||
|  | ||||
| struct logical_and_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a && b; } | ||||
| }; | ||||
| struct logical_or_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a || b; } | ||||
| }; | ||||
|  | ||||
| struct equal_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a == b; } | ||||
| }; | ||||
| struct not_equal_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a != b; } | ||||
| }; | ||||
| struct lesser_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a < b; } | ||||
| }; | ||||
| struct lesser_equal_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a <= b; } | ||||
| }; | ||||
|  | ||||
| struct greater_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a > b; } | ||||
| }; | ||||
| struct greater_equal_op { | ||||
|   template<typename A, typename B> static inline bool run(A a, B b) { return a >= b; } | ||||
| }; | ||||
|  | ||||
| struct not_op { | ||||
|   template<typename A> static inline bool run(A a) { return !a; } | ||||
| }; | ||||
| struct negation_op { | ||||
|   template<typename A> static inline bool run(A a) { return -a; } | ||||
| }; | ||||
| struct greater_equal_zero_op { | ||||
|   template<typename A> static inline bool run(A a) { return a >= 0; } | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, std::size_t N> | ||||
| struct ArrayApplyAndReduce { | ||||
|   static inline bool run(const array<A, N>& a) { | ||||
|     EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     bool result = Reducer::run(Op::run(a[0]), Op::run(a[1])); | ||||
|     for (size_t i = 2; i < N; ++i) { | ||||
|       result = Reducer::run(result, Op::run(a[i])); | ||||
|     } | ||||
|     return result; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A> | ||||
| struct ArrayApplyAndReduce<Reducer, Op, A, 1>  { | ||||
|   static inline bool run(const array<A, 1>& a) { | ||||
|     return Op::run(a[0]); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, std::size_t N> | ||||
| inline bool array_apply_and_reduce(const array<A, N>& a) { | ||||
|   return ArrayApplyAndReduce<Reducer, Op, A, N>::run(a); | ||||
| } | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, typename B, std::size_t N> | ||||
| struct ArrayZipAndReduce { | ||||
|   static inline bool run(const array<A, N>& a, const array<B, N>& b) { | ||||
|     EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE); | ||||
|     bool result = Reducer::run(Op::run(a[0], b[0]), Op::run(a[1], b[1])); | ||||
|     for (size_t i = 2; i < N; ++i) { | ||||
|       result = Reducer::run(result, Op::run(a[i], b[i])); | ||||
|     } | ||||
|     return result; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, typename B> | ||||
| struct ArrayZipAndReduce<Reducer, Op, A, B, 1> { | ||||
|   static inline bool run(const array<A, 1>& a, const array<B, 1>& b) { | ||||
|     return Op::run(a[0], b[0]); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<typename Reducer, typename Op, typename A, typename B, std::size_t N> | ||||
| inline bool array_zip_and_reduce(const array<A, N>& a, const array<B, N>& b) { | ||||
|   return ArrayZipAndReduce<Reducer, Op, A, B, N>::run(a, b); | ||||
| } | ||||
|  | ||||
| }  // end namespace internal | ||||
|  | ||||
| }  // end namespace Eigen | ||||
|  | ||||
|  | ||||
|  | ||||
| #endif  // EIGEN_EMULATE_CXX11_META_H | ||||
							
								
								
									
										141
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								external/include/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,141 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_FIXEDSIZEVECTOR_H | ||||
| #define EIGEN_FIXEDSIZEVECTOR_H | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** \class MaxSizeVector | ||||
|   * \ingroup Core | ||||
|   * | ||||
|   * \brief The MaxSizeVector class. | ||||
|   * | ||||
|   * The %MaxSizeVector provides a subset of std::vector functionality. | ||||
|   * | ||||
|   * The goal is to provide basic std::vector operations when using | ||||
|   * std::vector is not an option (e.g. on GPU or when compiling using | ||||
|   * FMA/AVX, as this can cause either compilation failures or illegal | ||||
|   * instruction failures). | ||||
|   * | ||||
|   * Beware: The constructors are not API compatible with these of | ||||
|   * std::vector. | ||||
|   */ | ||||
| template <typename T> | ||||
| class MaxSizeVector { | ||||
|  public: | ||||
|   // Construct a new MaxSizeVector, reserve n elements. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   explicit MaxSizeVector(size_t n) | ||||
|       : reserve_(n), size_(0), | ||||
|         data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { | ||||
|     for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; } | ||||
|   } | ||||
|  | ||||
|   // Construct a new MaxSizeVector, reserve and resize to n. | ||||
|   // Copy the init value to all elements. | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   MaxSizeVector(size_t n, const T& init) | ||||
|       : reserve_(n), size_(n), | ||||
|         data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { | ||||
|     for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); } | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   ~MaxSizeVector() { | ||||
|     for (size_t i = 0; i < size_; ++i) { | ||||
|       data_[i].~T(); | ||||
|     } | ||||
|     internal::aligned_free(data_); | ||||
|   } | ||||
|  | ||||
|   void resize(size_t n) { | ||||
|     eigen_assert(n <= reserve_); | ||||
|     for (size_t i = size_; i < n; ++i) { | ||||
|       new (&data_[i]) T; | ||||
|     } | ||||
|     for (size_t i = n; i < size_; ++i) { | ||||
|       data_[i].~T(); | ||||
|     } | ||||
|     size_ = n; | ||||
|   } | ||||
|  | ||||
|   // Append new elements (up to reserved size). | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void push_back(const T& t) { | ||||
|     eigen_assert(size_ < reserve_); | ||||
|     data_[size_++] = t; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const T& operator[] (size_t i) const { | ||||
|     eigen_assert(i < size_); | ||||
|     return data_[i]; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   T& operator[] (size_t i) { | ||||
|     eigen_assert(i < size_); | ||||
|     return data_[i]; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   T& back() { | ||||
|     eigen_assert(size_ > 0); | ||||
|     return data_[size_ - 1]; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const T& back() const { | ||||
|     eigen_assert(size_ > 0); | ||||
|     return data_[size_ - 1]; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   void pop_back() { | ||||
|     // NOTE: This does not destroy the value at the end the way | ||||
|     // std::vector's version of pop_back() does.  That happens when | ||||
|     // the Vector is destroyed. | ||||
|     eigen_assert(size_ > 0); | ||||
|     size_--; | ||||
|   } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   size_t size() const { return size_; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   bool empty() const { return size_ == 0; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   T* data() { return data_; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const T* data() const { return data_; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   T* begin() { return data_; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   T* end() { return data_ + size_; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const T* begin() const { return data_; } | ||||
|  | ||||
|   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE | ||||
|   const T* end() const { return data_ + size_; } | ||||
|  | ||||
|  private: | ||||
|   size_t reserve_; | ||||
|   size_t size_; | ||||
|   T* data_; | ||||
| }; | ||||
|  | ||||
| }  // namespace Eigen | ||||
|  | ||||
| #endif  // EIGEN_FIXEDSIZEVECTOR_H | ||||
							
								
								
									
										43
									
								
								external/include/eigen3/unsupported/Eigen/EulerAngles
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								external/include/eigen3/unsupported/Eigen/EulerAngles
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,43 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_EULERANGLES_MODULE_H | ||||
| #define EIGEN_EULERANGLES_MODULE_H | ||||
|  | ||||
|  | ||||
| #include "Eigen/Core" | ||||
| #include "Eigen/Geometry" | ||||
|  | ||||
| #include "Eigen/src/Core/util/DisableStupidWarnings.h" | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** | ||||
|   * \defgroup EulerAngles_Module EulerAngles module | ||||
|   * \brief This module provides generic euler angles rotation. | ||||
|   * | ||||
|   * Euler angles are a way to represent 3D rotation. | ||||
|   * | ||||
|   * In order to use this module in your code, include this header: | ||||
|   * \code | ||||
|   * #include <unsupported/Eigen/EulerAngles> | ||||
|   * \endcode | ||||
|   * | ||||
|   * See \ref EulerAngles for more information. | ||||
|   * | ||||
|   */ | ||||
|  | ||||
| } | ||||
|  | ||||
| #include "src/EulerAngles/EulerSystem.h" | ||||
| #include "src/EulerAngles/EulerAngles.h" | ||||
|  | ||||
| #include "Eigen/src/Core/util/ReenableStupidWarnings.h" | ||||
|  | ||||
| #endif // EIGEN_EULERANGLES_MODULE_H | ||||
							
								
								
									
										419
									
								
								external/include/eigen3/unsupported/Eigen/FFT
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										419
									
								
								external/include/eigen3/unsupported/Eigen/FFT
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,419 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra.  | ||||
| // | ||||
| // Copyright (C) 2009 Mark Borgerding mark a borgerding net | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_FFT_H | ||||
| #define EIGEN_FFT_H | ||||
|  | ||||
| #include <complex> | ||||
| #include <vector> | ||||
| #include <map> | ||||
| #include <Eigen/Core> | ||||
|  | ||||
|  | ||||
| /** | ||||
|   * \defgroup FFT_Module Fast Fourier Transform module | ||||
|   * | ||||
|   * \code | ||||
|   * #include <unsupported/Eigen/FFT> | ||||
|   * \endcode | ||||
|   * | ||||
|   * This module provides Fast Fourier transformation, with a configurable backend | ||||
|   * implementation. | ||||
|   * | ||||
|   * The default implementation is based on kissfft. It is a small, free, and | ||||
|   * reasonably efficient default. | ||||
|   * | ||||
|   * There are currently two implementation backend: | ||||
|   * | ||||
|   * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size. | ||||
|   * - MKL (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form. | ||||
|   * | ||||
|   * \section FFTDesign Design | ||||
|   * | ||||
|   * The following design decisions were made concerning scaling and | ||||
|   * half-spectrum for real FFT. | ||||
|   * | ||||
|   * The intent is to facilitate generic programming and ease migrating code | ||||
|   * from  Matlab/octave. | ||||
|   * We think the default behavior of Eigen/FFT should favor correctness and | ||||
|   * generality over speed. Of course, the caller should be able to "opt-out" from this | ||||
|   * behavior and get the speed increase if they want it. | ||||
|   * | ||||
|   * 1) %Scaling: | ||||
|   * Other libraries (FFTW,IMKL,KISSFFT)  do not perform scaling, so there | ||||
|   * is a constant gain incurred after the forward&inverse transforms , so  | ||||
|   * IFFT(FFT(x)) = Kx;  this is done to avoid a vector-by-value multiply.   | ||||
|   * The downside is that algorithms that worked correctly in Matlab/octave  | ||||
|   * don't behave the same way once implemented in C++. | ||||
|   * | ||||
|   * How Eigen/FFT differs: invertible scaling is performed so IFFT( FFT(x) ) = x.  | ||||
|   * | ||||
|   * 2) Real FFT half-spectrum | ||||
|   * Other libraries use only half the frequency spectrum (plus one extra  | ||||
|   * sample for the Nyquist bin) for a real FFT, the other half is the  | ||||
|   * conjugate-symmetric of the first half.  This saves them a copy and some  | ||||
|   * memory.  The downside is the caller needs to have special logic for the  | ||||
|   * number of bins in complex vs real. | ||||
|   * | ||||
|   * How Eigen/FFT differs: The full spectrum is returned from the forward  | ||||
|   * transform.  This facilitates generic template programming by obviating  | ||||
|   * separate specializations for real vs complex.  On the inverse | ||||
|   * transform, only half the spectrum is actually used if the output type is real. | ||||
|   */ | ||||
|   | ||||
|  | ||||
| #ifdef EIGEN_FFTW_DEFAULT | ||||
| // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size | ||||
| #  include <fftw3.h> | ||||
| #  include "src/FFT/ei_fftw_impl.h" | ||||
|    namespace Eigen { | ||||
|      //template <typename T> typedef struct internal::fftw_impl  default_fft_impl; this does not work | ||||
|      template <typename T> struct default_fft_impl : public internal::fftw_impl<T> {}; | ||||
|    } | ||||
| #elif defined EIGEN_MKL_DEFAULT | ||||
| // TODO  | ||||
| // intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form | ||||
| #  include "src/FFT/ei_imklfft_impl.h" | ||||
|    namespace Eigen { | ||||
|      template <typename T> struct default_fft_impl : public internal::imklfft_impl {}; | ||||
|    } | ||||
| #else | ||||
| // internal::kissfft_impl:  small, free, reasonably efficient default, derived from kissfft | ||||
| // | ||||
| # include "src/FFT/ei_kissfft_impl.h" | ||||
|   namespace Eigen { | ||||
|      template <typename T>  | ||||
|        struct default_fft_impl : public internal::kissfft_impl<T> {}; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
|   | ||||
| //  | ||||
| template<typename T_SrcMat,typename T_FftIfc> struct fft_fwd_proxy; | ||||
| template<typename T_SrcMat,typename T_FftIfc> struct fft_inv_proxy; | ||||
|  | ||||
| namespace internal { | ||||
| template<typename T_SrcMat,typename T_FftIfc> | ||||
| struct traits< fft_fwd_proxy<T_SrcMat,T_FftIfc> > | ||||
| { | ||||
|   typedef typename T_SrcMat::PlainObject ReturnType; | ||||
| }; | ||||
| template<typename T_SrcMat,typename T_FftIfc> | ||||
| struct traits< fft_inv_proxy<T_SrcMat,T_FftIfc> > | ||||
| { | ||||
|   typedef typename T_SrcMat::PlainObject ReturnType; | ||||
| }; | ||||
| } | ||||
|  | ||||
| template<typename T_SrcMat,typename T_FftIfc>  | ||||
| struct fft_fwd_proxy | ||||
|  : public ReturnByValue<fft_fwd_proxy<T_SrcMat,T_FftIfc> > | ||||
| { | ||||
|   typedef DenseIndex Index; | ||||
|  | ||||
|   fft_fwd_proxy(const T_SrcMat& src,T_FftIfc & fft, Index nfft) : m_src(src),m_ifc(fft), m_nfft(nfft) {} | ||||
|  | ||||
|   template<typename T_DestMat> void evalTo(T_DestMat& dst) const; | ||||
|  | ||||
|   Index rows() const { return m_src.rows(); } | ||||
|   Index cols() const { return m_src.cols(); } | ||||
| protected: | ||||
|   const T_SrcMat & m_src; | ||||
|   T_FftIfc & m_ifc; | ||||
|   Index m_nfft; | ||||
| private: | ||||
|   fft_fwd_proxy& operator=(const fft_fwd_proxy&); | ||||
| }; | ||||
|  | ||||
| template<typename T_SrcMat,typename T_FftIfc>  | ||||
| struct fft_inv_proxy | ||||
|  : public ReturnByValue<fft_inv_proxy<T_SrcMat,T_FftIfc> > | ||||
| { | ||||
|   typedef DenseIndex Index; | ||||
|  | ||||
|   fft_inv_proxy(const T_SrcMat& src,T_FftIfc & fft, Index nfft) : m_src(src),m_ifc(fft), m_nfft(nfft) {} | ||||
|  | ||||
|   template<typename T_DestMat> void evalTo(T_DestMat& dst) const; | ||||
|  | ||||
|   Index rows() const { return m_src.rows(); } | ||||
|   Index cols() const { return m_src.cols(); } | ||||
| protected: | ||||
|   const T_SrcMat & m_src; | ||||
|   T_FftIfc & m_ifc; | ||||
|   Index m_nfft; | ||||
| private: | ||||
|   fft_inv_proxy& operator=(const fft_inv_proxy&); | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <typename T_Scalar, | ||||
|          typename T_Impl=default_fft_impl<T_Scalar> > | ||||
| class FFT | ||||
| { | ||||
|   public: | ||||
|     typedef T_Impl impl_type; | ||||
|     typedef DenseIndex Index; | ||||
|     typedef typename impl_type::Scalar Scalar; | ||||
|     typedef typename impl_type::Complex Complex; | ||||
|  | ||||
|     enum Flag { | ||||
|       Default=0, // goof proof | ||||
|       Unscaled=1, | ||||
|       HalfSpectrum=2, | ||||
|       // SomeOtherSpeedOptimization=4 | ||||
|       Speedy=32767 | ||||
|     }; | ||||
|  | ||||
|     FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) { } | ||||
|  | ||||
|     inline | ||||
|     bool HasFlag(Flag f) const { return (m_flag & (int)f) == f;} | ||||
|  | ||||
|     inline | ||||
|     void SetFlag(Flag f) { m_flag |= (int)f;} | ||||
|  | ||||
|     inline | ||||
|     void ClearFlag(Flag f) { m_flag &= (~(int)f);} | ||||
|  | ||||
|     inline | ||||
|     void fwd( Complex * dst, const Scalar * src, Index nfft) | ||||
|     { | ||||
|         m_impl.fwd(dst,src,static_cast<int>(nfft)); | ||||
|         if ( HasFlag(HalfSpectrum) == false) | ||||
|           ReflectSpectrum(dst,nfft); | ||||
|     } | ||||
|  | ||||
|     inline | ||||
|     void fwd( Complex * dst, const Complex * src, Index nfft) | ||||
|     { | ||||
|         m_impl.fwd(dst,src,static_cast<int>(nfft)); | ||||
|     } | ||||
|  | ||||
|     /* | ||||
|     inline  | ||||
|     void fwd2(Complex * dst, const Complex * src, int n0,int n1) | ||||
|     { | ||||
|       m_impl.fwd2(dst,src,n0,n1); | ||||
|     } | ||||
|     */ | ||||
|  | ||||
|     template <typename _Input> | ||||
|     inline | ||||
|     void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src)  | ||||
|     { | ||||
|       if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) ) | ||||
|         dst.resize( (src.size()>>1)+1); // half the bins + Nyquist bin | ||||
|       else | ||||
|         dst.resize(src.size()); | ||||
|       fwd(&dst[0],&src[0],src.size()); | ||||
|     } | ||||
|  | ||||
|     template<typename InputDerived, typename ComplexDerived> | ||||
|     inline | ||||
|     void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src, Index nfft=-1) | ||||
|     { | ||||
|       typedef typename ComplexDerived::Scalar dst_type; | ||||
|       typedef typename InputDerived::Scalar src_type; | ||||
|       EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) | ||||
|       EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) | ||||
|       EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time | ||||
|       EIGEN_STATIC_ASSERT((internal::is_same<dst_type, Complex>::value), | ||||
|             YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) | ||||
|       EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, | ||||
|             THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) | ||||
|  | ||||
|       if (nfft<1) | ||||
|         nfft = src.size(); | ||||
|  | ||||
|       if ( NumTraits< src_type >::IsComplex == 0 && HasFlag(HalfSpectrum) ) | ||||
|         dst.derived().resize( (nfft>>1)+1); | ||||
|       else | ||||
|         dst.derived().resize(nfft); | ||||
|  | ||||
|       if ( src.innerStride() != 1 || src.size() < nfft ) { | ||||
|         Matrix<src_type,1,Dynamic> tmp; | ||||
|         if (src.size()<nfft) { | ||||
|           tmp.setZero(nfft); | ||||
|           tmp.block(0,0,src.size(),1 ) = src; | ||||
|         }else{ | ||||
|           tmp = src; | ||||
|         } | ||||
|         fwd( &dst[0],&tmp[0],nfft ); | ||||
|       }else{ | ||||
|         fwd( &dst[0],&src[0],nfft ); | ||||
|       } | ||||
|     } | ||||
|   | ||||
|     template<typename InputDerived> | ||||
|     inline | ||||
|     fft_fwd_proxy< MatrixBase<InputDerived>, FFT<T_Scalar,T_Impl> > | ||||
|     fwd( const MatrixBase<InputDerived> & src, Index nfft=-1) | ||||
|     { | ||||
|       return fft_fwd_proxy< MatrixBase<InputDerived> ,FFT<T_Scalar,T_Impl> >( src, *this,nfft ); | ||||
|     } | ||||
|  | ||||
|     template<typename InputDerived> | ||||
|     inline | ||||
|     fft_inv_proxy< MatrixBase<InputDerived>, FFT<T_Scalar,T_Impl> > | ||||
|     inv( const MatrixBase<InputDerived> & src, Index nfft=-1) | ||||
|     { | ||||
|       return  fft_inv_proxy< MatrixBase<InputDerived> ,FFT<T_Scalar,T_Impl> >( src, *this,nfft ); | ||||
|     } | ||||
|  | ||||
|     inline | ||||
|     void inv( Complex * dst, const Complex * src, Index nfft) | ||||
|     { | ||||
|       m_impl.inv( dst,src,static_cast<int>(nfft) ); | ||||
|       if ( HasFlag( Unscaled ) == false) | ||||
|         scale(dst,Scalar(1./nfft),nfft); // scale the time series | ||||
|     } | ||||
|  | ||||
|     inline | ||||
|     void inv( Scalar * dst, const Complex * src, Index nfft) | ||||
|     { | ||||
|       m_impl.inv( dst,src,static_cast<int>(nfft) ); | ||||
|       if ( HasFlag( Unscaled ) == false) | ||||
|         scale(dst,Scalar(1./nfft),nfft); // scale the time series | ||||
|     } | ||||
|  | ||||
|     template<typename OutputDerived, typename ComplexDerived> | ||||
|     inline | ||||
|     void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1) | ||||
|     { | ||||
|       typedef typename ComplexDerived::Scalar src_type; | ||||
|       typedef typename ComplexDerived::RealScalar real_type; | ||||
|       typedef typename OutputDerived::Scalar dst_type; | ||||
|       const bool realfft= (NumTraits<dst_type>::IsComplex == 0); | ||||
|       EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) | ||||
|       EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) | ||||
|       EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time | ||||
|       EIGEN_STATIC_ASSERT((internal::is_same<src_type, Complex>::value), | ||||
|             YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) | ||||
|       EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, | ||||
|             THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) | ||||
|  | ||||
|       if (nfft<1) { //automatic FFT size determination | ||||
|         if ( realfft && HasFlag(HalfSpectrum) )  | ||||
|           nfft = 2*(src.size()-1); //assume even fft size | ||||
|         else | ||||
|           nfft = src.size(); | ||||
|       } | ||||
|       dst.derived().resize( nfft ); | ||||
|  | ||||
|       // check for nfft that does not fit the input data size | ||||
|       Index resize_input= ( realfft && HasFlag(HalfSpectrum) ) | ||||
|         ? ( (nfft/2+1) - src.size() ) | ||||
|         : ( nfft - src.size() ); | ||||
|  | ||||
|       if ( src.innerStride() != 1 || resize_input ) { | ||||
|         // if the vector is strided, then we need to copy it to a packed temporary | ||||
|         Matrix<src_type,1,Dynamic> tmp; | ||||
|         if ( resize_input ) { | ||||
|           size_t ncopy = (std::min)(src.size(),src.size() + resize_input); | ||||
|           tmp.setZero(src.size() + resize_input); | ||||
|           if ( realfft && HasFlag(HalfSpectrum) ) { | ||||
|             // pad at the Nyquist bin | ||||
|             tmp.head(ncopy) = src.head(ncopy); | ||||
|             tmp(ncopy-1) = real(tmp(ncopy-1)); // enforce real-only Nyquist bin | ||||
|           }else{ | ||||
|             size_t nhead,ntail; | ||||
|             nhead = 1+ncopy/2-1; // range  [0:pi) | ||||
|             ntail = ncopy/2-1;   // range (-pi:0) | ||||
|             tmp.head(nhead) = src.head(nhead); | ||||
|             tmp.tail(ntail) = src.tail(ntail); | ||||
|             if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it | ||||
|               tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*real_type(.5); | ||||
|             }else{ // expanding -- split the old Nyquist bin into two halves | ||||
|               tmp(nhead) = src(nhead) * real_type(.5); | ||||
|               tmp(tmp.size()-nhead) = tmp(nhead); | ||||
|             } | ||||
|           } | ||||
|         }else{ | ||||
|           tmp = src; | ||||
|         } | ||||
|         inv( &dst[0],&tmp[0], nfft); | ||||
|       }else{ | ||||
|         inv( &dst[0],&src[0], nfft); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     template <typename _Output> | ||||
|     inline | ||||
|     void inv( std::vector<_Output> & dst, const std::vector<Complex> & src,Index nfft=-1) | ||||
|     { | ||||
|       if (nfft<1) | ||||
|         nfft = ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size(); | ||||
|       dst.resize( nfft ); | ||||
|       inv( &dst[0],&src[0],nfft); | ||||
|     } | ||||
|  | ||||
|  | ||||
|     /* | ||||
|     // TODO: multi-dimensional FFTs | ||||
|     inline  | ||||
|     void inv2(Complex * dst, const Complex * src, int n0,int n1) | ||||
|     { | ||||
|       m_impl.inv2(dst,src,n0,n1); | ||||
|       if ( HasFlag( Unscaled ) == false) | ||||
|           scale(dst,1./(n0*n1),n0*n1); | ||||
|     } | ||||
|   */ | ||||
|  | ||||
|     inline | ||||
|     impl_type & impl() {return m_impl;} | ||||
|   private: | ||||
|  | ||||
|     template <typename T_Data> | ||||
|     inline | ||||
|     void scale(T_Data * x,Scalar s,Index nx) | ||||
|     { | ||||
| #if 1 | ||||
|       for (int k=0;k<nx;++k) | ||||
|         *x++ *= s; | ||||
| #else | ||||
|       if ( ((ptrdiff_t)x) & 15 ) | ||||
|         Matrix<T_Data, Dynamic, 1>::Map(x,nx) *= s; | ||||
|       else | ||||
|         Matrix<T_Data, Dynamic, 1>::MapAligned(x,nx) *= s; | ||||
|          //Matrix<T_Data, Dynamic, Dynamic>::Map(x,nx) * s; | ||||
| #endif   | ||||
|     } | ||||
|  | ||||
|     inline | ||||
|     void ReflectSpectrum(Complex * freq, Index nfft) | ||||
|     { | ||||
|       // create the implicit right-half spectrum (conjugate-mirror of the left-half) | ||||
|       Index nhbins=(nfft>>1)+1; | ||||
|       for (Index k=nhbins;k < nfft; ++k ) | ||||
|         freq[k] = conj(freq[nfft-k]); | ||||
|     } | ||||
|  | ||||
|     impl_type m_impl; | ||||
|     int m_flag; | ||||
| }; | ||||
|  | ||||
| template<typename T_SrcMat,typename T_FftIfc>  | ||||
| template<typename T_DestMat> inline  | ||||
| void fft_fwd_proxy<T_SrcMat,T_FftIfc>::evalTo(T_DestMat& dst) const | ||||
| { | ||||
|     m_ifc.fwd( dst, m_src, m_nfft); | ||||
| } | ||||
|  | ||||
| template<typename T_SrcMat,typename T_FftIfc>  | ||||
| template<typename T_DestMat> inline  | ||||
| void fft_inv_proxy<T_SrcMat,T_FftIfc>::evalTo(T_DestMat& dst) const | ||||
| { | ||||
|     m_ifc.inv( dst, m_src, m_nfft); | ||||
| } | ||||
|  | ||||
| } | ||||
| #endif | ||||
| /* vim: set filetype=cpp et sw=2 ts=2 ai: */ | ||||
							
								
								
									
										42
									
								
								external/include/eigen3/unsupported/Eigen/IterativeSolvers
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								external/include/eigen3/unsupported/Eigen/IterativeSolvers
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_ITERATIVE_SOLVERS_MODULE_H | ||||
| #define EIGEN_ITERATIVE_SOLVERS_MODULE_H | ||||
|  | ||||
| #include <Eigen/Sparse> | ||||
|  | ||||
| /** | ||||
|   * \defgroup IterativeSolvers_Module Iterative solvers module | ||||
|   * This module aims to provide various iterative linear and non linear solver algorithms. | ||||
|   * It currently provides: | ||||
|   *  - a constrained conjugate gradient | ||||
|   *  - a Householder GMRES implementation | ||||
|   * \code | ||||
|   * #include <unsupported/Eigen/IterativeSolvers> | ||||
|   * \endcode | ||||
|   */ | ||||
| //@{ | ||||
|  | ||||
| #ifndef EIGEN_MPL2_ONLY | ||||
| #include "src/IterativeSolvers/IterationController.h" | ||||
| #include "src/IterativeSolvers/ConstrainedConjGrad.h" | ||||
| #endif | ||||
|  | ||||
| #include "src/IterativeSolvers/IncompleteLU.h" | ||||
| #include "../../Eigen/Jacobi" | ||||
| #include "../../Eigen/Householder" | ||||
| #include "src/IterativeSolvers/GMRES.h" | ||||
| #include "src/IterativeSolvers/DGMRES.h" | ||||
| //#include "src/IterativeSolvers/SSORPreconditioner.h" | ||||
| #include "src/IterativeSolvers/MINRES.h" | ||||
|  | ||||
| //@} | ||||
|  | ||||
| #endif // EIGEN_ITERATIVE_SOLVERS_MODULE_H | ||||
							
								
								
									
										36
									
								
								external/include/eigen3/unsupported/Eigen/KroneckerProduct
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								external/include/eigen3/unsupported/Eigen/KroneckerProduct
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_KRONECKER_PRODUCT_MODULE_H | ||||
| #define EIGEN_KRONECKER_PRODUCT_MODULE_H | ||||
|  | ||||
| #include "../../Eigen/Core" | ||||
|  | ||||
| #include "../../Eigen/src/Core/util/DisableStupidWarnings.h" | ||||
|  | ||||
| #include "../../Eigen/src/SparseCore/SparseUtil.h" | ||||
|  | ||||
| namespace Eigen { | ||||
|  | ||||
| /** | ||||
|   * \defgroup KroneckerProduct_Module KroneckerProduct module | ||||
|   * | ||||
|   * This module contains an experimental Kronecker product implementation. | ||||
|   * | ||||
|   * \code | ||||
|   * #include <Eigen/KroneckerProduct> | ||||
|   * \endcode | ||||
|   */ | ||||
|  | ||||
| } // namespace Eigen | ||||
|  | ||||
| #include "src/KroneckerProduct/KroneckerTensorProduct.h" | ||||
|  | ||||
| #include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" | ||||
|  | ||||
| #endif // EIGEN_KRONECKER_PRODUCT_MODULE_H | ||||
							
								
								
									
										45
									
								
								external/include/eigen3/unsupported/Eigen/LevenbergMarquardt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								external/include/eigen3/unsupported/Eigen/LevenbergMarquardt
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library | ||||
| // for linear algebra. | ||||
| // | ||||
| // Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_LEVENBERGMARQUARDT_MODULE | ||||
| #define EIGEN_LEVENBERGMARQUARDT_MODULE | ||||
|  | ||||
| // #include <vector> | ||||
|  | ||||
| #include <Eigen/Core> | ||||
| #include <Eigen/Jacobi> | ||||
| #include <Eigen/QR> | ||||
| #include <unsupported/Eigen/NumericalDiff>  | ||||
|  | ||||
| #include <Eigen/SparseQR> | ||||
|  | ||||
| /** | ||||
|   * \defgroup LevenbergMarquardt_Module Levenberg-Marquardt module | ||||
|   * | ||||
|   * \code | ||||
|   * #include </Eigen/LevenbergMarquardt> | ||||
|   * \endcode | ||||
|   * | ||||
|   *  | ||||
|   */ | ||||
|  | ||||
| #include "Eigen/SparseCore" | ||||
| #ifndef EIGEN_PARSED_BY_DOXYGEN | ||||
|  | ||||
| #include "src/LevenbergMarquardt/LMqrsolv.h" | ||||
| #include "src/LevenbergMarquardt/LMcovar.h" | ||||
| #include "src/LevenbergMarquardt/LMpar.h" | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #include "src/LevenbergMarquardt/LevenbergMarquardt.h" | ||||
| #include "src/LevenbergMarquardt/LMonestep.h" | ||||
|  | ||||
|  | ||||
| #endif // EIGEN_LEVENBERGMARQUARDT_MODULE | ||||
							
								
								
									
										209
									
								
								external/include/eigen3/unsupported/Eigen/MPRealSupport
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										209
									
								
								external/include/eigen3/unsupported/Eigen/MPRealSupport
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,209 @@ | ||||
| // This file is part of a joint effort between Eigen, a lightweight C++ template library | ||||
| // for linear algebra, and MPFR C++, a C++ interface to MPFR library (http://www.holoborodko.com/pavel/) | ||||
| // | ||||
| // Copyright (C) 2010-2012 Pavel Holoborodko <pavel@holoborodko.com> | ||||
| // Copyright (C) 2010 Konstantin Holoborodko <konstantin@holoborodko.com> | ||||
| // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> | ||||
| // | ||||
| // This Source Code Form is subject to the terms of the Mozilla | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||
|  | ||||
| #ifndef EIGEN_MPREALSUPPORT_MODULE_H | ||||
| #define EIGEN_MPREALSUPPORT_MODULE_H | ||||
|  | ||||
| #include <Eigen/Core> | ||||
| #include <mpreal.h> | ||||
|  | ||||
| namespace Eigen { | ||||
|    | ||||
| /** | ||||
|   * \defgroup MPRealSupport_Module MPFRC++ Support module | ||||
|   * \code | ||||
|   * #include <Eigen/MPRealSupport> | ||||
|   * \endcode | ||||
|   * | ||||
|   * This module provides support for multi precision floating point numbers | ||||
|   * via the <a href="http://www.holoborodko.com/pavel/mpfr">MPFR C++</a> | ||||
|   * library which itself is built upon <a href="http://www.mpfr.org/">MPFR</a>/<a href="http://gmplib.org/">GMP</a>. | ||||
|   * | ||||
|   * \warning MPFR C++ is licensed under the GPL. | ||||
|   * | ||||
|   * You can find a copy of MPFR C++ that is known to be compatible in the unsupported/test/mpreal folder. | ||||
|   * | ||||
|   * Here is an example: | ||||
|   * | ||||
| \code | ||||
| #include <iostream> | ||||
| #include <Eigen/MPRealSupport> | ||||
| #include <Eigen/LU> | ||||
| using namespace mpfr; | ||||
| using namespace Eigen; | ||||
| int main() | ||||
| { | ||||
|   // set precision to 256 bits (double has only 53 bits) | ||||
|   mpreal::set_default_prec(256); | ||||
|   // Declare matrix and vector types with multi-precision scalar type | ||||
|   typedef Matrix<mpreal,Dynamic,Dynamic>  MatrixXmp; | ||||
|   typedef Matrix<mpreal,Dynamic,1>        VectorXmp; | ||||
|  | ||||
|   MatrixXmp A = MatrixXmp::Random(100,100); | ||||
|   VectorXmp b = VectorXmp::Random(100); | ||||
|  | ||||
|   // Solve Ax=b using LU | ||||
|   VectorXmp x = A.lu().solve(b); | ||||
|   std::cout << "relative error: " << (A*x - b).norm() / b.norm() << std::endl; | ||||
|   return 0; | ||||
| } | ||||
| \endcode | ||||
|   * | ||||
|   */ | ||||
| 	 | ||||
|   template<> struct NumTraits<mpfr::mpreal> | ||||
|     : GenericNumTraits<mpfr::mpreal> | ||||
|   { | ||||
|     enum { | ||||
|       IsInteger = 0, | ||||
|       IsSigned = 1, | ||||
|       IsComplex = 0, | ||||
|       RequireInitialization = 1, | ||||
|       ReadCost = HugeCost, | ||||
|       AddCost  = HugeCost, | ||||
|       MulCost  = HugeCost | ||||
|     }; | ||||
|  | ||||
|     typedef mpfr::mpreal Real; | ||||
|     typedef mpfr::mpreal NonInteger; | ||||
|      | ||||
|     static inline Real highest  (long Precision = mpfr::mpreal::get_default_prec()) { return  mpfr::maxval(Precision); } | ||||
|     static inline Real lowest   (long Precision = mpfr::mpreal::get_default_prec()) { return -mpfr::maxval(Precision); } | ||||
|  | ||||
|     // Constants | ||||
|     static inline Real Pi      (long Precision = mpfr::mpreal::get_default_prec())  { return mpfr::const_pi(Precision);        } | ||||
|     static inline Real Euler   (long Precision = mpfr::mpreal::get_default_prec())  { return mpfr::const_euler(Precision);     } | ||||
|     static inline Real Log2    (long Precision = mpfr::mpreal::get_default_prec())  { return mpfr::const_log2(Precision);      } | ||||
|     static inline Real Catalan (long Precision = mpfr::mpreal::get_default_prec())  { return mpfr::const_catalan(Precision);   } | ||||
|  | ||||
|     static inline Real epsilon (long Precision = mpfr::mpreal::get_default_prec())  { return mpfr::machine_epsilon(Precision); } | ||||
|     static inline Real epsilon (const Real& x)                                      { return mpfr::machine_epsilon(x); } | ||||
|  | ||||
| #ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS | ||||
|     static inline int digits10 (long Precision = mpfr::mpreal::get_default_prec())  { return std::numeric_limits<Real>::digits10(Precision); } | ||||
|     static inline int digits10 (const Real& x)                                      { return std::numeric_limits<Real>::digits10(x); } | ||||
| #endif | ||||
|  | ||||
|     static inline Real dummy_precision() | ||||
|     { | ||||
|       mpfr_prec_t weak_prec = ((mpfr::mpreal::get_default_prec()-1) * 90) / 100; | ||||
|       return mpfr::machine_epsilon(weak_prec); | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   namespace internal { | ||||
|  | ||||
|   template<> inline mpfr::mpreal random<mpfr::mpreal>() | ||||
|   { | ||||
|     return mpfr::random(); | ||||
|   } | ||||
|  | ||||
|   template<> inline mpfr::mpreal random<mpfr::mpreal>(const mpfr::mpreal& a, const mpfr::mpreal& b) | ||||
|   { | ||||
|     return a + (b-a) * random<mpfr::mpreal>(); | ||||
|   } | ||||
|  | ||||
|   inline bool isMuchSmallerThan(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) | ||||
|   { | ||||
|     return mpfr::abs(a) <= mpfr::abs(b) * eps; | ||||
|   } | ||||
|  | ||||
|   inline bool isApprox(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) | ||||
|   { | ||||
|     return mpfr::isEqualFuzzy(a,b,eps); | ||||
|   } | ||||
|  | ||||
|   inline bool isApproxOrLessThan(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) | ||||
|   { | ||||
|     return a <= b || mpfr::isEqualFuzzy(a,b,eps); | ||||
|   } | ||||
|  | ||||
|   template<> inline long double cast<mpfr::mpreal,long double>(const mpfr::mpreal& x) | ||||
|   { return x.toLDouble(); } | ||||
|  | ||||
|   template<> inline double cast<mpfr::mpreal,double>(const mpfr::mpreal& x) | ||||
|   { return x.toDouble(); } | ||||
|  | ||||
|   template<> inline long cast<mpfr::mpreal,long>(const mpfr::mpreal& x) | ||||
|   { return x.toLong(); } | ||||
|  | ||||
|   template<> inline int cast<mpfr::mpreal,int>(const mpfr::mpreal& x) | ||||
|   { return int(x.toLong()); } | ||||
|  | ||||
|   // Specialize GEBP kernel and traits for mpreal (no need for peeling, nor complicated stuff) | ||||
|   // This also permits to directly call mpfr's routines and avoid many temporaries produced by mpreal | ||||
|     template<> | ||||
|     class gebp_traits<mpfr::mpreal, mpfr::mpreal, false, false> | ||||
|     { | ||||
|     public: | ||||
|       typedef mpfr::mpreal ResScalar; | ||||
|       enum { | ||||
|         Vectorizable = false, | ||||
|         LhsPacketSize = 1, | ||||
|         RhsPacketSize = 1, | ||||
|         ResPacketSize = 1, | ||||
|         NumberOfRegisters = 1, | ||||
|         nr = 1, | ||||
|         mr = 1, | ||||
|         LhsProgress = 1, | ||||
|         RhsProgress = 1 | ||||
|       }; | ||||
|       typedef ResScalar LhsPacket; | ||||
|       typedef ResScalar RhsPacket; | ||||
|       typedef ResScalar ResPacket; | ||||
|        | ||||
|     }; | ||||
|  | ||||
|  | ||||
|  | ||||
|     template<typename Index, typename DataMapper, bool ConjugateLhs, bool ConjugateRhs> | ||||
|     struct gebp_kernel<mpfr::mpreal,mpfr::mpreal,Index,DataMapper,1,1,ConjugateLhs,ConjugateRhs> | ||||
|     { | ||||
|       typedef mpfr::mpreal mpreal; | ||||
|  | ||||
|       EIGEN_DONT_INLINE | ||||
|       void operator()(const DataMapper& res, const mpreal* blockA, const mpreal* blockB,  | ||||
|                       Index rows, Index depth, Index cols, const mpreal& alpha, | ||||
|                       Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0) | ||||
|       { | ||||
|         if(rows==0 || cols==0 || depth==0) | ||||
|           return; | ||||
|  | ||||
|         mpreal  acc1(0,mpfr_get_prec(blockA[0].mpfr_srcptr())), | ||||
|                 tmp (0,mpfr_get_prec(blockA[0].mpfr_srcptr())); | ||||
|  | ||||
|         if(strideA==-1) strideA = depth; | ||||
|         if(strideB==-1) strideB = depth; | ||||
|  | ||||
|         for(Index i=0; i<rows; ++i) | ||||
|         { | ||||
|           for(Index j=0; j<cols; ++j) | ||||
|           { | ||||
|             const mpreal *A = blockA + i*strideA + offsetA; | ||||
|             const mpreal *B = blockB + j*strideB + offsetB; | ||||
|              | ||||
|             acc1 = 0; | ||||
|             for(Index k=0; k<depth; k++) | ||||
|             { | ||||
|               mpfr_mul(tmp.mpfr_ptr(), A[k].mpfr_srcptr(), B[k].mpfr_srcptr(), mpreal::get_default_rnd()); | ||||
|               mpfr_add(acc1.mpfr_ptr(), acc1.mpfr_ptr(), tmp.mpfr_ptr(),  mpreal::get_default_rnd()); | ||||
|             } | ||||
|              | ||||
|             mpfr_mul(acc1.mpfr_ptr(), acc1.mpfr_srcptr(), alpha.mpfr_srcptr(), mpreal::get_default_rnd()); | ||||
|             mpfr_add(res(i,j).mpfr_ptr(), res(i,j).mpfr_srcptr(), acc1.mpfr_srcptr(),  mpreal::get_default_rnd()); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     }; | ||||
|   } // end namespace internal | ||||
| } | ||||
|  | ||||
| #endif // EIGEN_MPREALSUPPORT_MODULE_H | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user