cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

TensorStriding.h (13513B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
     11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
     12 
     13 namespace Eigen {
     14 
     15 /** \class TensorStriding
     16   * \ingroup CXX11_Tensor_Module
     17   *
     18   * \brief Tensor striding class.
     19   *
     20   *
     21   */
     22 namespace internal {
     23 template<typename Strides, typename XprType>
     24 struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
     25 {
     26   typedef typename XprType::Scalar Scalar;
     27   typedef traits<XprType> XprTraits;
     28   typedef typename XprTraits::StorageKind StorageKind;
     29   typedef typename XprTraits::Index Index;
     30   typedef typename XprType::Nested Nested;
     31   typedef typename remove_reference<Nested>::type _Nested;
     32   static const int NumDimensions = XprTraits::NumDimensions;
     33   static const int Layout = XprTraits::Layout;
     34   typedef typename XprTraits::PointerType PointerType;
     35 };
     36 
     37 template<typename Strides, typename XprType>
     38 struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
     39 {
     40   typedef const TensorStridingOp<Strides, XprType>EIGEN_DEVICE_REF type;
     41 };
     42 
     43 template<typename Strides, typename XprType>
     44 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
     45 {
     46   typedef TensorStridingOp<Strides, XprType> type;
     47 };
     48 
     49 }  // end namespace internal
     50 
     51 
     52 
     53 template<typename Strides, typename XprType>
     54 class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
     55 {
     56   public:
     57     typedef TensorBase<TensorStridingOp<Strides, XprType> > Base;
     58     typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
     59     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
     60     typedef typename XprType::CoeffReturnType CoeffReturnType;
     61     typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
     62     typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
     63     typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index;
     64 
     65     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims)
     66       : m_xpr(expr), m_dims(dims) {}
     67 
     68     EIGEN_DEVICE_FUNC
     69     const Strides& strides() const { return m_dims; }
     70 
     71     EIGEN_DEVICE_FUNC
     72     const typename internal::remove_all<typename XprType::Nested>::type&
     73     expression() const { return m_xpr; }
     74 
     75     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp)
     76 
     77   protected:
     78     typename XprType::Nested m_xpr;
     79     const Strides m_dims;
     80 };
     81 
     82 
     83 // Eval as rvalue
     84 template<typename Strides, typename ArgType, typename Device>
     85 struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
     86 {
     87   typedef TensorStridingOp<Strides, ArgType> XprType;
     88   typedef typename XprType::Index Index;
     89   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
     90   typedef DSizes<Index, NumDims> Dimensions;
     91   typedef typename XprType::Scalar Scalar;
     92   typedef typename XprType::CoeffReturnType CoeffReturnType;
     93   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
     94   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
     95   typedef StorageMemory<CoeffReturnType, Device> Storage;
     96   typedef typename Storage::Type EvaluatorPointerType;
     97 
     98   enum {
     99     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
    100     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    101     BlockAccess = false,
    102     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
    103     Layout = TensorEvaluator<ArgType, Device>::Layout,
    104     CoordAccess = false,  // to be implemented
    105     RawAccess = false
    106   };
    107 
    108   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    109   typedef internal::TensorBlockNotImplemented TensorBlock;
    110   //===--------------------------------------------------------------------===//
    111 
    112   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    113       : m_impl(op.expression(), device)
    114   {
    115     m_dimensions = m_impl.dimensions();
    116     for (int i = 0; i < NumDims; ++i) {
    117       m_dimensions[i] =Eigen::numext::ceil(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
    118     }
    119 
    120     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
    121     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    122       m_outputStrides[0] = 1;
    123       m_inputStrides[0] = 1;
    124       for (int i = 1; i < NumDims; ++i) {
    125         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
    126         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
    127         m_inputStrides[i-1] *= op.strides()[i-1];
    128       }
    129       m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
    130     } else {  // RowMajor
    131       m_outputStrides[NumDims-1] = 1;
    132       m_inputStrides[NumDims-1] = 1;
    133       for (int i = NumDims - 2; i >= 0; --i) {
    134         m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
    135         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
    136         m_inputStrides[i+1] *= op.strides()[i+1];
    137       }
    138       m_inputStrides[0] *= op.strides()[0];
    139     }
    140   }
    141 
    142 
    143   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
    144 
    145   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) {
    146     m_impl.evalSubExprsIfNeeded(NULL);
    147     return true;
    148   }
    149   EIGEN_STRONG_INLINE void cleanup() {
    150     m_impl.cleanup();
    151   }
    152 
    153   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    154   {
    155     return m_impl.coeff(srcCoeff(index));
    156   }
    157 
    158   template<int LoadMode>
    159   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    160   {
    161     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    162     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
    163 
    164     Index inputIndices[] = {0, 0};
    165     Index indices[] = {index, index + PacketSize - 1};
    166     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    167       EIGEN_UNROLL_LOOP
    168       for (int i = NumDims - 1; i > 0; --i) {
    169         const Index idx0 = indices[0] / m_outputStrides[i];
    170         const Index idx1 = indices[1] / m_outputStrides[i];
    171         inputIndices[0] += idx0 * m_inputStrides[i];
    172         inputIndices[1] += idx1 * m_inputStrides[i];
    173         indices[0] -= idx0 * m_outputStrides[i];
    174         indices[1] -= idx1 * m_outputStrides[i];
    175       }
    176       inputIndices[0] += indices[0] * m_inputStrides[0];
    177       inputIndices[1] += indices[1] * m_inputStrides[0];
    178     } else {  // RowMajor
    179       EIGEN_UNROLL_LOOP
    180       for (int i = 0; i < NumDims - 1; ++i) {
    181         const Index idx0 = indices[0] / m_outputStrides[i];
    182         const Index idx1 = indices[1] / m_outputStrides[i];
    183         inputIndices[0] += idx0 * m_inputStrides[i];
    184         inputIndices[1] += idx1 * m_inputStrides[i];
    185         indices[0] -= idx0 * m_outputStrides[i];
    186         indices[1] -= idx1 * m_outputStrides[i];
    187       }
    188       inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
    189       inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
    190     }
    191     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
    192       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
    193       return rslt;
    194     }
    195     else {
    196       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
    197       values[0] = m_impl.coeff(inputIndices[0]);
    198       values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
    199       EIGEN_UNROLL_LOOP
    200       for (int i = 1; i < PacketSize-1; ++i) {
    201         values[i] = coeff(index+i);
    202       }
    203       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
    204       return rslt;
    205     }
    206   }
    207 
    208   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    209     double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
    210                                            TensorOpCost::MulCost<Index>() +
    211                                            TensorOpCost::DivCost<Index>()) +
    212         TensorOpCost::MulCost<Index>();
    213     if (vectorized) {
    214       compute_cost *= 2;  // packet() computes two indices
    215     }
    216     const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1);
    217     return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
    218         // Computation is not vectorized per se, but it is done once per packet.
    219         TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
    220   }
    221 
    222   EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
    223 
    224 #ifdef EIGEN_USE_SYCL
    225   // binding placeholder accessors to a command group handler for SYCL
    226   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    227     m_impl.bind(cgh);
    228   }
    229 #endif
    230  protected:
    231   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
    232   {
    233     Index inputIndex = 0;
    234     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    235       EIGEN_UNROLL_LOOP
    236       for (int i = NumDims - 1; i > 0; --i) {
    237         const Index idx = index / m_outputStrides[i];
    238         inputIndex += idx * m_inputStrides[i];
    239         index -= idx * m_outputStrides[i];
    240       }
    241       inputIndex += index * m_inputStrides[0];
    242     } else {  // RowMajor
    243       EIGEN_UNROLL_LOOP
    244       for (int i = 0; i < NumDims - 1; ++i) {
    245         const Index idx = index / m_outputStrides[i];
    246         inputIndex += idx * m_inputStrides[i];
    247         index -= idx * m_outputStrides[i];
    248       }
    249       inputIndex += index * m_inputStrides[NumDims-1];
    250     }
    251     return inputIndex;
    252   }
    253 
    254   Dimensions m_dimensions;
    255   array<Index, NumDims> m_outputStrides;
    256   array<Index, NumDims> m_inputStrides;
    257   TensorEvaluator<ArgType, Device> m_impl;
    258 };
    259 
    260 // Eval as lvalue
    261 template<typename Strides, typename ArgType, typename Device>
    262 struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
    263     : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
    264 {
    265   typedef TensorStridingOp<Strides, ArgType> XprType;
    266   typedef TensorEvaluator<const XprType, Device> Base;
    267   //  typedef typename XprType::Index Index;
    268   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
    269   //  typedef DSizes<Index, NumDims> Dimensions;
    270 
    271   enum {
    272     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
    273     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    274     PreferBlockAccess = false,
    275     Layout = TensorEvaluator<ArgType, Device>::Layout,
    276     CoordAccess = false,  // to be implemented
    277     RawAccess = false
    278   };
    279 
    280   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    281       : Base(op, device) { }
    282 
    283   typedef typename XprType::Index Index;
    284   typedef typename XprType::Scalar Scalar;
    285   typedef typename XprType::CoeffReturnType CoeffReturnType;
    286   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    287   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    288 
    289   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
    290   {
    291     return this->m_impl.coeffRef(this->srcCoeff(index));
    292   }
    293 
    294   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    295   void writePacket(Index index, const PacketReturnType& x)
    296   {
    297     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    298     eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize());
    299 
    300     Index inputIndices[] = {0, 0};
    301     Index indices[] = {index, index + PacketSize - 1};
    302     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    303       EIGEN_UNROLL_LOOP
    304       for (int i = NumDims - 1; i > 0; --i) {
    305         const Index idx0 = indices[0] / this->m_outputStrides[i];
    306         const Index idx1 = indices[1] / this->m_outputStrides[i];
    307         inputIndices[0] += idx0 * this->m_inputStrides[i];
    308         inputIndices[1] += idx1 * this->m_inputStrides[i];
    309         indices[0] -= idx0 * this->m_outputStrides[i];
    310         indices[1] -= idx1 * this->m_outputStrides[i];
    311       }
    312       inputIndices[0] += indices[0] * this->m_inputStrides[0];
    313       inputIndices[1] += indices[1] * this->m_inputStrides[0];
    314     } else {  // RowMajor
    315       EIGEN_UNROLL_LOOP
    316       for (int i = 0; i < NumDims - 1; ++i) {
    317         const Index idx0 = indices[0] / this->m_outputStrides[i];
    318         const Index idx1 = indices[1] / this->m_outputStrides[i];
    319         inputIndices[0] += idx0 * this->m_inputStrides[i];
    320         inputIndices[1] += idx1 * this->m_inputStrides[i];
    321         indices[0] -= idx0 * this->m_outputStrides[i];
    322         indices[1] -= idx1 * this->m_outputStrides[i];
    323       }
    324       inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
    325       inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
    326     }
    327     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
    328       this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
    329     }
    330     else {
    331       EIGEN_ALIGN_MAX Scalar values[PacketSize];
    332       internal::pstore<Scalar, PacketReturnType>(values, x);
    333       this->m_impl.coeffRef(inputIndices[0]) = values[0];
    334       this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
    335       EIGEN_UNROLL_LOOP
    336       for (int i = 1; i < PacketSize-1; ++i) {
    337         this->coeffRef(index+i) = values[i];
    338       }
    339     }
    340   }
    341 };
    342 
    343 
    344 } // end namespace Eigen
    345 
    346 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H