cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

TensorReverse.h (16938B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
      5 //                    Benoit Steiner <benoit.steiner.goog@gmail.com>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
     12 #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
     13 namespace Eigen {
     14 
     15 /** \class TensorReverse
     16   * \ingroup CXX11_Tensor_Module
     17   *
     18   * \brief Tensor reverse elements class.
     19   *
     20   */
     21 namespace internal {
     22 template<typename ReverseDimensions, typename XprType>
     23 struct traits<TensorReverseOp<ReverseDimensions,
     24                               XprType> > : public traits<XprType>
     25 {
     26   typedef typename XprType::Scalar Scalar;
     27   typedef traits<XprType> XprTraits;
     28   typedef typename XprTraits::StorageKind StorageKind;
     29   typedef typename XprTraits::Index Index;
     30   typedef typename XprType::Nested Nested;
     31   typedef typename remove_reference<Nested>::type _Nested;
     32   static const int NumDimensions = XprTraits::NumDimensions;
     33   static const int Layout = XprTraits::Layout;
     34   typedef typename XprTraits::PointerType PointerType;
     35 };
     36 
     37 template<typename ReverseDimensions, typename XprType>
     38 struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense>
     39 {
     40   typedef const TensorReverseOp<ReverseDimensions, XprType>& type;
     41 };
     42 
     43 template<typename ReverseDimensions, typename XprType>
     44 struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
     45             typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type>
     46 {
     47   typedef TensorReverseOp<ReverseDimensions, XprType> type;
     48 };
     49 
     50 }  // end namespace internal
     51 
     52 template<typename ReverseDimensions, typename XprType>
     53 class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
     54                                           XprType>, WriteAccessors>
     55 {
     56   public:
     57     typedef TensorBase<TensorReverseOp<ReverseDimensions, XprType>, WriteAccessors>Base;
     58     typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar;
     59     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
     60     typedef typename XprType::CoeffReturnType CoeffReturnType;
     61     typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested;
     62     typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
     63                                                                       StorageKind;
     64     typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index;
     65 
     66     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(
     67       const XprType& expr, const ReverseDimensions& reverse_dims)
     68       : m_xpr(expr), m_reverse_dims(reverse_dims) { }
     69 
     70     EIGEN_DEVICE_FUNC
     71     const ReverseDimensions& reverse() const { return m_reverse_dims; }
     72 
     73     EIGEN_DEVICE_FUNC
     74     const typename internal::remove_all<typename XprType::Nested>::type&
     75     expression() const { return m_xpr; }
     76 
     77     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReverseOp)
     78 
     79 
     80   protected:
     81     typename XprType::Nested m_xpr;
     82     const ReverseDimensions m_reverse_dims;
     83 };
     84 
     85 // Eval as rvalue
     86 template<typename ReverseDimensions, typename ArgType, typename Device>
     87 struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device>
     88 {
     89   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
     90   typedef typename XprType::Index Index;
     91   static const int NumDims = internal::array_size<ReverseDimensions>::value;
     92   typedef DSizes<Index, NumDims> Dimensions;
     93   typedef typename XprType::Scalar Scalar;
     94   typedef typename XprType::CoeffReturnType CoeffReturnType;
     95   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
     96   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
     97   typedef StorageMemory<CoeffReturnType, Device> Storage;
     98   typedef typename Storage::Type EvaluatorPointerType;
     99 
    100   enum {
    101     IsAligned         = false,
    102     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
    103     BlockAccess       = NumDims > 0,
    104     PreferBlockAccess = true,
    105     Layout            = TensorEvaluator<ArgType, Device>::Layout,
    106     CoordAccess       = false,  // to be implemented
    107     RawAccess         = false
    108   };
    109 
    110   typedef internal::TensorIntDivisor<Index> IndexDivisor;
    111 
    112   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    113   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
    114   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    115 
    116   typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
    117       ArgTensorBlock;
    118 
    119   typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
    120                                                      Layout, Index>
    121       TensorBlock;
    122   //===--------------------------------------------------------------------===//
    123 
    124   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    125       : m_impl(op.expression(), device),
    126         m_reverse(op.reverse()),
    127         m_device(device)
    128   {
    129     // Reversing a scalar isn't supported yet. It would be a no-op anyway.
    130     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
    131 
    132     // Compute strides
    133     m_dimensions = m_impl.dimensions();
    134     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    135       m_strides[0] = 1;
    136       for (int i = 1; i < NumDims; ++i) {
    137         m_strides[i] = m_strides[i-1] * m_dimensions[i-1];
    138         if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
    139       }
    140     } else {
    141       m_strides[NumDims-1] = 1;
    142       for (int i = NumDims - 2; i >= 0; --i) {
    143         m_strides[i] = m_strides[i+1] * m_dimensions[i+1];
    144         if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
    145       }
    146     }
    147   }
    148 
    149   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    150   const Dimensions& dimensions() const { return m_dimensions; }
    151 
    152   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
    153     m_impl.evalSubExprsIfNeeded(NULL);
    154     return true;
    155   }
    156 
    157 #ifdef EIGEN_USE_THREADS
    158   template <typename EvalSubExprsCallback>
    159   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    160       EvaluatorPointerType, EvalSubExprsCallback done) {
    161     m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
    162   }
    163 #endif  // EIGEN_USE_THREADS
    164 
    165   EIGEN_STRONG_INLINE void cleanup() {
    166     m_impl.cleanup();
    167   }
    168 
    169   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex(
    170       Index index) const {
    171     eigen_assert(index < dimensions().TotalSize());
    172     Index inputIndex = 0;
    173     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    174       EIGEN_UNROLL_LOOP
    175       for (int i = NumDims - 1; i > 0; --i) {
    176         Index idx = index / m_fastStrides[i];
    177         index -= idx * m_strides[i];
    178         if (m_reverse[i]) {
    179           idx = m_dimensions[i] - idx - 1;
    180         }
    181         inputIndex += idx * m_strides[i] ;
    182       }
    183       if (m_reverse[0]) {
    184         inputIndex += (m_dimensions[0] - index - 1);
    185       } else {
    186         inputIndex += index;
    187       }
    188     } else {
    189       EIGEN_UNROLL_LOOP
    190       for (int i = 0; i < NumDims - 1; ++i) {
    191         Index idx = index / m_fastStrides[i];
    192         index -= idx * m_strides[i];
    193         if (m_reverse[i]) {
    194           idx = m_dimensions[i] - idx - 1;
    195         }
    196         inputIndex += idx * m_strides[i] ;
    197       }
    198       if (m_reverse[NumDims-1]) {
    199         inputIndex += (m_dimensions[NumDims-1] - index - 1);
    200       } else {
    201         inputIndex += index;
    202       }
    203     }
    204     return inputIndex;
    205   }
    206 
    207   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
    208       Index index) const  {
    209     return m_impl.coeff(reverseIndex(index));
    210   }
    211 
    212   template<int LoadMode>
    213   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    214   PacketReturnType packet(Index index) const
    215   {
    216     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    217     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
    218 
    219     // TODO(ndjaitly): write a better packing routine that uses
    220     // local structure.
    221     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
    222                                                             values[PacketSize];
    223     EIGEN_UNROLL_LOOP
    224     for (int i = 0; i < PacketSize; ++i) {
    225       values[i] = coeff(index+i);
    226     }
    227     PacketReturnType rslt = internal::pload<PacketReturnType>(values);
    228     return rslt;
    229   }
    230 
    231   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    232   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    233     const size_t target_size = m_device.lastLevelCacheSize();
    234     // Block evaluation reads underlying memory in reverse order, and default
    235     // cost model does not properly catch this in bytes stored/loaded.
    236     return internal::TensorBlockResourceRequirements::skewed<Scalar>(
    237                target_size)
    238         .addCostPerCoeff({0, 0, 24});
    239   }
    240 
    241   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    242   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    243           bool /*root_of_expr_ast*/ = false) const {
    244     // TODO(ezhulenev): If underlying tensor expression supports and prefers
    245     // block evaluation we must use it. Currently we use coeff and packet
    246     // access into the underlying tensor expression.
    247     // static const bool useBlockAccessForArgType =
    248     //     TensorEvaluator<ArgType, Device>::BlockAccess &&
    249     //     TensorEvaluator<ArgType, Device>::PreferBlockAccess;
    250 
    251     static const bool isColMajor =
    252         static_cast<int>(Layout) == static_cast<int>(ColMajor);
    253 
    254     static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
    255     const bool inner_dim_reversed = m_reverse[inner_dim_idx];
    256 
    257     // Offset in the output block.
    258     Index block_offset = 0;
    259 
    260     // Offset in the input Tensor.
    261     Index input_offset = reverseIndex(desc.offset());
    262 
    263     // Initialize output block iterator state. Dimension in this array are
    264     // always in inner_most -> outer_most order (col major layout).
    265     array<BlockIteratorState, NumDims> it;
    266     for (int i = 0; i < NumDims; ++i) {
    267       const int dim = isColMajor ? i : NumDims - 1 - i;
    268       it[i].size = desc.dimension(dim);
    269       it[i].count = 0;
    270       it[i].reverse = m_reverse[dim];
    271 
    272       it[i].block_stride =
    273           i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride);
    274       it[i].block_span = it[i].block_stride * (it[i].size - 1);
    275 
    276       it[i].input_stride = m_strides[dim];
    277       it[i].input_span = it[i].input_stride * (it[i].size - 1);
    278 
    279       if (it[i].reverse) {
    280         it[i].input_stride = -1 * it[i].input_stride;
    281         it[i].input_span = -1 * it[i].input_span;
    282       }
    283     }
    284 
    285     // If multiple inner dimensions have the same reverse flag, check if we can
    286     // merge them into a single virtual inner dimension.
    287     int effective_inner_dim = 0;
    288     for (int i = 1; i < NumDims; ++i) {
    289       if (it[i].reverse != it[effective_inner_dim].reverse) break;
    290       if (it[i].block_stride != it[effective_inner_dim].size) break;
    291       if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
    292 
    293       it[i].size = it[effective_inner_dim].size * it[i].size;
    294 
    295       it[i].block_stride = 1;
    296       it[i].input_stride = (inner_dim_reversed ? -1 : 1);
    297 
    298       it[i].block_span = it[i].block_stride * (it[i].size - 1);
    299       it[i].input_span = it[i].input_stride * (it[i].size - 1);
    300 
    301       effective_inner_dim = i;
    302     }
    303 
    304     eigen_assert(it[effective_inner_dim].block_stride == 1);
    305     eigen_assert(it[effective_inner_dim].input_stride ==
    306                  (inner_dim_reversed ? -1 : 1));
    307 
    308     const Index inner_dim_size = it[effective_inner_dim].size;
    309 
    310     // Prepare storage for the materialized reverse result.
    311     const typename TensorBlock::Storage block_storage =
    312         TensorBlock::prepareStorage(desc, scratch);
    313     CoeffReturnType* block_buffer = block_storage.data();
    314 
    315     while (it[NumDims - 1].count < it[NumDims - 1].size) {
    316       // Copy inner-most dimension data from reversed location in input.
    317       Index dst = block_offset;
    318       Index src = input_offset;
    319 
    320       // NOTE(ezhulenev): Adding vectorized path with internal::preverse showed
    321       // worse results in benchmarks than a simple coefficient loop.
    322       if (inner_dim_reversed) {
    323         for (Index i = 0; i < inner_dim_size; ++i) {
    324           block_buffer[dst] = m_impl.coeff(src);
    325           ++dst;
    326           --src;
    327         }
    328       } else {
    329         for (Index i = 0; i < inner_dim_size; ++i) {
    330           block_buffer[dst] = m_impl.coeff(src);
    331           ++dst;
    332           ++src;
    333         }
    334       }
    335 
    336       // For the 1d tensor we need to generate only one inner-most dimension.
    337       if ((NumDims - effective_inner_dim) == 1) break;
    338 
    339       // Update offset.
    340       for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
    341         if (++it[i].count < it[i].size) {
    342           block_offset += it[i].block_stride;
    343           input_offset += it[i].input_stride;
    344           break;
    345         }
    346         if (i != NumDims - 1) it[i].count = 0;
    347         block_offset -= it[i].block_span;
    348         input_offset -= it[i].input_span;
    349       }
    350     }
    351 
    352     return block_storage.AsTensorMaterializedBlock();
    353   }
    354 
    355   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    356     double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
    357                                      2 * TensorOpCost::MulCost<Index>() +
    358                                      TensorOpCost::DivCost<Index>());
    359     for (int i = 0; i < NumDims; ++i) {
    360       if (m_reverse[i]) {
    361         compute_cost += 2 * TensorOpCost::AddCost<Index>();
    362       }
    363     }
    364     return m_impl.costPerCoeff(vectorized) +
    365            TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
    366   }
    367 
    368   EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
    369 
    370 #ifdef EIGEN_USE_SYCL
    371   // binding placeholder accessors to a command group handler for SYCL
    372   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    373     m_impl.bind(cgh);
    374   }
    375 #endif
    376 
    377  protected:
    378   Dimensions m_dimensions;
    379   array<Index, NumDims> m_strides;
    380   array<IndexDivisor, NumDims> m_fastStrides;
    381   TensorEvaluator<ArgType, Device> m_impl;
    382   ReverseDimensions m_reverse;
    383   const Device EIGEN_DEVICE_REF m_device;
    384 
    385  private:
    386   struct BlockIteratorState {
    387     BlockIteratorState()
    388         : size(0),
    389           count(0),
    390           reverse(false),
    391           block_stride(0),
    392           block_span(0),
    393           input_stride(0),
    394           input_span(0) {}
    395 
    396     Index size;
    397     Index count;
    398     bool reverse;
    399     Index block_stride;
    400     Index block_span;
    401     Index input_stride;
    402     Index input_span;
    403   };
    404 };
    405 
    406 // Eval as lvalue
    407 
    408 template <typename ReverseDimensions, typename ArgType, typename Device>
    409 struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
    410     : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
    411                              Device> {
    412   typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
    413                           Device> Base;
    414   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
    415   typedef typename XprType::Index Index;
    416   static const int NumDims = internal::array_size<ReverseDimensions>::value;
    417   typedef DSizes<Index, NumDims> Dimensions;
    418 
    419   enum {
    420     IsAligned = false,
    421     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    422     BlockAccess = false,
    423     PreferBlockAccess = false,
    424     Layout = TensorEvaluator<ArgType, Device>::Layout,
    425     CoordAccess = false,  // to be implemented
    426     RawAccess = false
    427   };
    428   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    429       : Base(op, device) {}
    430 
    431   typedef typename XprType::Scalar Scalar;
    432   typedef typename XprType::CoeffReturnType CoeffReturnType;
    433   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    434   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    435 
    436   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    437   typedef internal::TensorBlockNotImplemented TensorBlock;
    438   //===--------------------------------------------------------------------===//
    439 
    440   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    441   const Dimensions& dimensions() const { return this->m_dimensions; }
    442 
    443   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
    444     return this->m_impl.coeffRef(this->reverseIndex(index));
    445   }
    446 
    447   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    448   void writePacket(Index index, const PacketReturnType& x) {
    449     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    450     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
    451 
    452     // This code is pilfered from TensorMorphing.h
    453     EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
    454     internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
    455     EIGEN_UNROLL_LOOP
    456     for (int i = 0; i < PacketSize; ++i) {
    457       this->coeffRef(index+i) = values[i];
    458     }
    459   }
    460 };
    461 
    462 
    463 }  // end namespace Eigen
    464 
    465 #endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H