cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

TensorForcedEval.h (8782B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
     11 #define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
     12 
     13 namespace Eigen {
     14 
     15 /** \class TensorForcedEval
     16   * \ingroup CXX11_Tensor_Module
     17   *
     18   * \brief Tensor reshaping class.
     19   *
     20   *
     21   */
     22 namespace internal {
     23 template<typename XprType>
     24 struct traits<TensorForcedEvalOp<XprType> >
     25 {
     26   // Type promotion to handle the case where the types of the lhs and the rhs are different.
     27   typedef typename XprType::Scalar Scalar;
     28   typedef traits<XprType> XprTraits;
     29   typedef typename traits<XprType>::StorageKind StorageKind;
     30   typedef typename traits<XprType>::Index Index;
     31   typedef typename XprType::Nested Nested;
     32   typedef typename remove_reference<Nested>::type _Nested;
     33   static const int NumDimensions = XprTraits::NumDimensions;
     34   static const int Layout = XprTraits::Layout;
     35   typedef typename XprTraits::PointerType PointerType;
     36 
     37   enum {
     38     Flags = 0
     39   };
     40 };
     41 
     42 template<typename XprType>
     43 struct eval<TensorForcedEvalOp<XprType>, Eigen::Dense>
     44 {
     45   typedef const TensorForcedEvalOp<XprType>& type;
     46 };
     47 
     48 template<typename XprType>
     49 struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type>
     50 {
     51   typedef TensorForcedEvalOp<XprType> type;
     52 };
     53 
     54 }  // end namespace internal
     55 
     56 
     57 
     58 template<typename XprType>
     59 class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOnlyAccessors>
     60 {
     61   public:
     62   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
     63   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
     64   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
     65   typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested;
     66   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind;
     67   typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index;
     68 
     69   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr)
     70       : m_xpr(expr) {}
     71 
     72     EIGEN_DEVICE_FUNC
     73     const typename internal::remove_all<typename XprType::Nested>::type&
     74     expression() const { return m_xpr; }
     75 
     76   protected:
     77     typename XprType::Nested m_xpr;
     78 };
     79 
     80 namespace internal {
     81 template <typename Device, typename CoeffReturnType>
     82 struct non_integral_type_placement_new{
     83   template <typename StorageType>
     84 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index numValues, StorageType m_buffer) {
     85    // Initialize non-trivially constructible types.
     86     if (!internal::is_arithmetic<CoeffReturnType>::value) {
     87       for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType();
     88     }
     89 }
     90 };
     91 
     92 // SYCL does not support non-integral types 
     93 // having new (m_buffer + i) CoeffReturnType() causes the following compiler error for SYCL Devices 
     94 // no matching function for call to 'operator new'
     95 template <typename CoeffReturnType>
     96 struct non_integral_type_placement_new<Eigen::SyclDevice, CoeffReturnType> {
     97   template <typename StorageType>
     98 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) {
     99 }
    100 };
    101 } // end namespace internal
    102 
    103 template<typename ArgType_, typename Device>
    104 struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
    105 {
    106   typedef const typename internal::remove_all<ArgType_>::type ArgType;
    107   typedef TensorForcedEvalOp<ArgType> XprType;
    108   typedef typename ArgType::Scalar Scalar;
    109   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
    110   typedef typename XprType::Index Index;
    111   typedef typename XprType::CoeffReturnType CoeffReturnType;
    112   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    113   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    114   typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
    115   typedef StorageMemory<CoeffReturnType, Device> Storage;
    116   typedef typename Storage::Type EvaluatorPointerType;
    117 
    118   enum {
    119     IsAligned         = true,
    120     PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),
    121     BlockAccess       = internal::is_arithmetic<CoeffReturnType>::value,
    122     PreferBlockAccess = false,
    123     Layout            = TensorEvaluator<ArgType, Device>::Layout,
    124     RawAccess         = true
    125   };
    126 
    127   static const int NumDims = internal::traits<ArgType>::NumDimensions;
    128 
    129   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    130   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
    131   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    132 
    133   typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
    134                                                      Layout, Index>
    135       TensorBlock;
    136   //===--------------------------------------------------------------------===//
    137 
    138   TensorEvaluator(const XprType& op, const Device& device)
    139       : m_impl(op.expression(), device), m_op(op.expression()),
    140       m_device(device), m_buffer(NULL)
    141   { }
    142 
    143   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
    144 
    145   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
    146     const Index numValues =  internal::array_prod(m_impl.dimensions());
    147     m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)));
    148 
    149    internal::non_integral_type_placement_new<Device, CoeffReturnType>()(numValues, m_buffer);
    150 
    151     typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
    152     EvalTo evalToTmp(m_device.get(m_buffer), m_op);
    153 
    154     internal::TensorExecutor<
    155         const EvalTo, typename internal::remove_const<Device>::type,
    156         /*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
    157         /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
    158         run(evalToTmp, m_device);
    159 
    160     return true;
    161   }
    162 
    163 #ifdef EIGEN_USE_THREADS
    164   template <typename EvalSubExprsCallback>
    165   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    166       EvaluatorPointerType, EvalSubExprsCallback done) {
    167     const Index numValues = internal::array_prod(m_impl.dimensions());
    168     m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(
    169         numValues * sizeof(CoeffReturnType)));
    170     typedef TensorEvalToOp<const typename internal::remove_const<ArgType>::type>
    171         EvalTo;
    172     EvalTo evalToTmp(m_device.get(m_buffer), m_op);
    173 
    174     auto on_done = std::bind([](EvalSubExprsCallback done_) { done_(true); },
    175                              std::move(done));
    176     internal::TensorAsyncExecutor<
    177         const EvalTo, typename internal::remove_const<Device>::type,
    178         decltype(on_done),
    179         /*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value,
    180         /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>::
    181         runAsync(evalToTmp, m_device, std::move(on_done));
    182   }
    183 #endif
    184 
    185   EIGEN_STRONG_INLINE void cleanup() {
    186     m_device.deallocate_temp(m_buffer);
    187     m_buffer = NULL;
    188   }
    189 
    190   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    191   {
    192     return m_buffer[index];
    193   }
    194 
    195   template<int LoadMode>
    196   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    197   {
    198     return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
    199   }
    200 
    201   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    202   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    203     return internal::TensorBlockResourceRequirements::any();
    204   }
    205 
    206   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    207   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    208           bool /*root_of_expr_ast*/ = false) const {
    209     assert(m_buffer != NULL);
    210     return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch);
    211   }
    212 
    213   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    214     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
    215   }
    216 
    217   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    218   EvaluatorPointerType data() const { return m_buffer; }
    219 
    220 #ifdef EIGEN_USE_SYCL
    221   // binding placeholder accessors to a command group handler for SYCL
    222   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    223     m_buffer.bind(cgh);
    224     m_impl.bind(cgh);
    225   }
    226 #endif
    227  private:
    228   TensorEvaluator<ArgType, Device> m_impl;
    229   const ArgType m_op;
    230   const Device EIGEN_DEVICE_REF m_device;
    231   EvaluatorPointerType m_buffer;
    232 };
    233 
    234 
    235 } // end namespace Eigen
    236 
    237 #endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H