cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

TensorEvaluator.h (40005B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
     11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
     12 
     13 namespace Eigen {
     14 
     15 /** \class TensorEvaluator
     16   * \ingroup CXX11_Tensor_Module
     17   *
     18   * \brief The tensor evaluator classes.
     19   *
     20   * These classes are responsible for the evaluation of the tensor expression.
     21   *
     22   * TODO: add support for more types of expressions, in particular expressions
     23   * leading to lvalues (slicing, reshaping, etc...)
     24   */
     25 
     26 // Generic evaluator
     27 template<typename Derived, typename Device>
     28 struct TensorEvaluator
     29 {
     30   typedef typename Derived::Index Index;
     31   typedef typename Derived::Scalar Scalar;
     32   typedef typename Derived::Scalar CoeffReturnType;
     33   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
     34   typedef typename Derived::Dimensions Dimensions;
     35   typedef Derived XprType;
     36   static const int PacketSize =  PacketType<CoeffReturnType, Device>::size;
     37   typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
     38   typedef StorageMemory<Scalar, Device> Storage;
     39   typedef typename Storage::Type EvaluatorPointerType;
     40 
     41   // NumDimensions is -1 for variable dim tensors
     42   static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
     43                                internal::traits<Derived>::NumDimensions : 0;
     44 
     45   enum {
     46     IsAligned          = Derived::IsAligned,
     47     PacketAccess       = (PacketType<CoeffReturnType, Device>::size > 1),
     48     BlockAccess        = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
     49     PreferBlockAccess  = false,
     50     Layout             = Derived::Layout,
     51     CoordAccess        = NumCoords > 0,
     52     RawAccess          = true
     53   };
     54 
     55   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
     56 
     57   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
     58   typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
     59   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
     60 
     61   typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
     62                                                      Layout, Index>
     63       TensorBlock;
     64   //===--------------------------------------------------------------------===//
     65 
     66   EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
     67       : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
     68         m_dims(m.dimensions()),
     69         m_device(device)
     70   { }
     71 
     72 
     73   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
     74 
     75   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
     76     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
     77       m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
     78       return false;
     79     }
     80     return true;
     81   }
     82 
     83 #ifdef EIGEN_USE_THREADS
     84   template <typename EvalSubExprsCallback>
     85   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
     86       EvaluatorPointerType dest, EvalSubExprsCallback done) {
     87     // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
     88     done(evalSubExprsIfNeeded(dest));
     89   }
     90 #endif  // EIGEN_USE_THREADS
     91 
     92   EIGEN_STRONG_INLINE void cleanup() {}
     93 
     94   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
     95     eigen_assert(m_data != NULL);
     96     return m_data[index];
     97   }
     98 
     99   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
    100     eigen_assert(m_data != NULL);
    101     return m_data[index];
    102   }
    103 
    104   template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    105   PacketReturnType packet(Index index) const
    106   {
    107     return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
    108   }
    109 
    110   // Return a packet starting at `index` where `umask` specifies which elements
    111   // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
    112   // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
    113   // float element will be loaded, otherwise 0 will be loaded.
    114   // Function has been templatized to enable Sfinae.
    115   template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    116   typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
    117   partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
    118   {
    119     return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
    120   }
    121 
    122   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    123   void writePacket(Index index, const PacketReturnType& x)
    124   {
    125     return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
    126   }
    127 
    128   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
    129     eigen_assert(m_data != NULL);
    130     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    131       return m_data[m_dims.IndexOfColMajor(coords)];
    132     } else {
    133       return m_data[m_dims.IndexOfRowMajor(coords)];
    134     }
    135   }
    136 
    137   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
    138   coeffRef(const array<DenseIndex, NumCoords>& coords) {
    139     eigen_assert(m_data != NULL);
    140     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    141       return m_data[m_dims.IndexOfColMajor(coords)];
    142     } else {
    143       return m_data[m_dims.IndexOfRowMajor(coords)];
    144     }
    145   }
    146 
    147   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    148     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
    149                         PacketType<CoeffReturnType, Device>::size);
    150   }
    151 
    152   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    153   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    154     return internal::TensorBlockResourceRequirements::any();
    155   }
    156 
    157   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    158   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    159           bool /*root_of_expr_ast*/ = false) const {
    160     assert(m_data != NULL);
    161     return TensorBlock::materialize(m_data, m_dims, desc, scratch);
    162   }
    163 
    164   template<typename TensorBlock>
    165   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
    166       const TensorBlockDesc& desc, const TensorBlock& block) {
    167     assert(m_data != NULL);
    168 
    169     typedef typename TensorBlock::XprType TensorBlockExpr;
    170     typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
    171                                             Index>
    172         TensorBlockAssign;
    173 
    174     TensorBlockAssign::Run(
    175         TensorBlockAssign::target(desc.dimensions(),
    176                                   internal::strides<Layout>(m_dims), m_data,
    177                                   desc.offset()),
    178         block.expr());
    179   }
    180 
    181   EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
    182 
    183 #ifdef EIGEN_USE_SYCL
    184   // binding placeholder accessors to a command group handler for SYCL
    185   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    186     m_data.bind(cgh);
    187   }
    188 #endif
    189  protected:
    190   EvaluatorPointerType m_data;
    191   Dimensions m_dims;
    192   const Device EIGEN_DEVICE_REF m_device;
    193 };
    194 
    195 namespace {
    196 template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
    197 T loadConstant(const T* address) {
    198   return *address;
    199 }
    200 // Use the texture cache on CUDA devices whenever possible
    201 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
    202 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
    203 float loadConstant(const float* address) {
    204   return __ldg(address);
    205 }
    206 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
    207 double loadConstant(const double* address) {
    208   return __ldg(address);
    209 }
    210 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
    211 Eigen::half loadConstant(const Eigen::half* address) {
    212   return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
    213 }
    214 #endif
    215 #ifdef EIGEN_USE_SYCL
    216 // overload of load constant should be implemented here based on range access
    217 template <cl::sycl::access::mode AcMd, typename T>
    218 T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
    219   return *address;
    220 }
    221 #endif
    222 }
    223 
    224 
    225 // Default evaluator for rvalues
    226 template<typename Derived, typename Device>
    227 struct TensorEvaluator<const Derived, Device>
    228 {
    229   typedef typename Derived::Index Index;
    230   typedef typename Derived::Scalar Scalar;
    231   typedef typename Derived::Scalar CoeffReturnType;
    232   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    233   typedef typename Derived::Dimensions Dimensions;
    234   typedef const Derived XprType;
    235   typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
    236   typedef StorageMemory<const Scalar, Device> Storage;
    237   typedef typename Storage::Type EvaluatorPointerType;
    238 
    239   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
    240 
    241   // NumDimensions is -1 for variable dim tensors
    242   static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
    243                                internal::traits<Derived>::NumDimensions : 0;
    244   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    245 
    246   enum {
    247     IsAligned         = Derived::IsAligned,
    248     PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),
    249     BlockAccess       = internal::is_arithmetic<ScalarNoConst>::value,
    250     PreferBlockAccess = false,
    251     Layout            = Derived::Layout,
    252     CoordAccess       = NumCoords > 0,
    253     RawAccess         = true
    254   };
    255 
    256   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    257   typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
    258   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    259 
    260   typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
    261                                                      Layout, Index>
    262       TensorBlock;
    263   //===--------------------------------------------------------------------===//
    264 
    265   EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
    266       : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
    267   { }
    268 
    269   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
    270 
    271   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
    272     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
    273       m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
    274       return false;
    275     }
    276     return true;
    277   }
    278 
    279 #ifdef EIGEN_USE_THREADS
    280   template <typename EvalSubExprsCallback>
    281   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    282       EvaluatorPointerType dest, EvalSubExprsCallback done) {
    283     // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
    284     done(evalSubExprsIfNeeded(dest));
    285   }
    286 #endif  // EIGEN_USE_THREADS
    287 
    288   EIGEN_STRONG_INLINE void cleanup() { }
    289 
    290   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
    291     eigen_assert(m_data != NULL);
    292     return loadConstant(m_data+index);
    293   }
    294 
    295   template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    296   PacketReturnType packet(Index index) const
    297   {
    298     return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
    299   }
    300 
    301   // Return a packet starting at `index` where `umask` specifies which elements
    302   // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
    303   // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
    304   // float element will be loaded, otherwise 0 will be loaded.
    305   // Function has been templatized to enable Sfinae.
    306   template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    307   typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
    308   partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
    309   {
    310     return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
    311   }
    312 
    313   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
    314     eigen_assert(m_data != NULL);
    315     const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
    316                         : m_dims.IndexOfRowMajor(coords);
    317     return loadConstant(m_data+index);
    318   }
    319 
    320   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    321     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
    322                         PacketType<CoeffReturnType, Device>::size);
    323   }
    324 
    325   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    326   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    327     return internal::TensorBlockResourceRequirements::any();
    328   }
    329 
    330   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    331   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    332           bool /*root_of_expr_ast*/ = false) const {
    333     assert(m_data != NULL);
    334     return TensorBlock::materialize(m_data, m_dims, desc, scratch);
    335   }
    336 
    337   EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
    338 #ifdef EIGEN_USE_SYCL
    339   // binding placeholder accessors to a command group handler for SYCL
    340   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    341     m_data.bind(cgh);
    342   }
    343 #endif
    344  protected:
    345   EvaluatorPointerType m_data;
    346   Dimensions m_dims;
    347   const Device EIGEN_DEVICE_REF m_device;
    348 };
    349 
    350 
    351 
    352 
    353 // -------------------- CwiseNullaryOp --------------------
    354 
    355 template<typename NullaryOp, typename ArgType, typename Device>
    356 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
    357 {
    358   typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
    359 
    360   TensorEvaluator(const XprType& op, const Device& device)
    361       : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
    362   { }
    363 
    364   typedef typename XprType::Index Index;
    365   typedef typename XprType::Scalar Scalar;
    366   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
    367   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    368   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    369   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
    370   typedef StorageMemory<CoeffReturnType, Device> Storage;
    371   typedef typename Storage::Type EvaluatorPointerType;
    372 
    373   enum {
    374     IsAligned = true,
    375     PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
    376     #ifdef EIGEN_USE_SYCL
    377     &&  (PacketType<CoeffReturnType, Device>::size >1)
    378     #endif
    379     ,
    380     BlockAccess = false,
    381     PreferBlockAccess = false,
    382     Layout = TensorEvaluator<ArgType, Device>::Layout,
    383     CoordAccess = false,  // to be implemented
    384     RawAccess = false
    385   };
    386 
    387   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    388   typedef internal::TensorBlockNotImplemented TensorBlock;
    389   //===--------------------------------------------------------------------===//
    390 
    391   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
    392 
    393   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
    394 
    395 #ifdef EIGEN_USE_THREADS
    396   template <typename EvalSubExprsCallback>
    397   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    398       EvaluatorPointerType, EvalSubExprsCallback done) {
    399     done(true);
    400   }
    401 #endif  // EIGEN_USE_THREADS
    402 
    403   EIGEN_STRONG_INLINE void cleanup() { }
    404 
    405   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
    406   {
    407     return m_wrapper(m_functor, index);
    408   }
    409 
    410   template<int LoadMode>
    411   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    412   {
    413     return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
    414   }
    415 
    416   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
    417   costPerCoeff(bool vectorized) const {
    418     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
    419                         PacketType<CoeffReturnType, Device>::size);
    420   }
    421 
    422   EIGEN_DEVICE_FUNC  EvaluatorPointerType data() const { return NULL; }
    423 
    424 #ifdef EIGEN_USE_SYCL
    425    // binding placeholder accessors to a command group handler for SYCL
    426   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    427     m_argImpl.bind(cgh);
    428   }
    429 #endif
    430 
    431  private:
    432   const NullaryOp m_functor;
    433   TensorEvaluator<ArgType, Device> m_argImpl;
    434   const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
    435 };
    436 
    437 
    438 
    439 // -------------------- CwiseUnaryOp --------------------
    440 
    441 template<typename UnaryOp, typename ArgType, typename Device>
    442 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
    443 {
    444   typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
    445 
    446   enum {
    447     IsAligned          = TensorEvaluator<ArgType, Device>::IsAligned,
    448     PacketAccess       = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
    449                          int(internal::functor_traits<UnaryOp>::PacketAccess),
    450     BlockAccess        = TensorEvaluator<ArgType, Device>::BlockAccess,
    451     PreferBlockAccess  = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
    452     Layout             = TensorEvaluator<ArgType, Device>::Layout,
    453     CoordAccess        = false,  // to be implemented
    454     RawAccess          = false
    455   };
    456 
    457   TensorEvaluator(const XprType& op, const Device& device)
    458     : m_device(device),
    459       m_functor(op.functor()),
    460       m_argImpl(op.nestedExpression(), device)
    461   { }
    462 
    463   typedef typename XprType::Index Index;
    464   typedef typename XprType::Scalar Scalar;
    465   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
    466   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
    467   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    468   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    469   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
    470   typedef StorageMemory<CoeffReturnType, Device> Storage;
    471   typedef typename Storage::Type EvaluatorPointerType;
    472   static const int NumDims = internal::array_size<Dimensions>::value;
    473 
    474   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    475   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
    476   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    477 
    478   typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
    479       ArgTensorBlock;
    480 
    481   typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
    482       TensorBlock;
    483   //===--------------------------------------------------------------------===//
    484 
    485   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
    486 
    487   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
    488     m_argImpl.evalSubExprsIfNeeded(NULL);
    489     return true;
    490   }
    491 
    492 #ifdef EIGEN_USE_THREADS
    493   template <typename EvalSubExprsCallback>
    494   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    495       EvaluatorPointerType, EvalSubExprsCallback done) {
    496     m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
    497   }
    498 #endif  // EIGEN_USE_THREADS
    499 
    500   EIGEN_STRONG_INLINE void cleanup() {
    501     m_argImpl.cleanup();
    502   }
    503 
    504   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
    505   {
    506     return m_functor(m_argImpl.coeff(index));
    507   }
    508 
    509   template<int LoadMode>
    510   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    511   {
    512     return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
    513   }
    514 
    515   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    516     const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
    517     return m_argImpl.costPerCoeff(vectorized) +
    518         TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
    519   }
    520 
    521   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    522   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    523     static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
    524     return m_argImpl.getResourceRequirements().addCostPerCoeff(
    525         {0, 0, functor_cost / PacketSize});
    526   }
    527 
    528   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    529   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    530           bool /*root_of_expr_ast*/ = false) const {
    531     return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
    532   }
    533 
    534   EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
    535 
    536 #ifdef EIGEN_USE_SYCL
    537   // binding placeholder accessors to a command group handler for SYCL
    538   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{
    539     m_argImpl.bind(cgh);
    540   }
    541 #endif
    542 
    543 
    544  private:
    545   const Device EIGEN_DEVICE_REF m_device;
    546   const UnaryOp m_functor;
    547   TensorEvaluator<ArgType, Device> m_argImpl;
    548 };
    549 
    550 
    551 // -------------------- CwiseBinaryOp --------------------
    552 
    553 template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device>
    554 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
    555 {
    556   typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
    557 
    558   enum {
    559     IsAligned         = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
    560                         int(TensorEvaluator<RightArgType, Device>::IsAligned),
    561     PacketAccess      = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
    562                         int(TensorEvaluator<RightArgType, Device>::PacketAccess) &
    563                         int(internal::functor_traits<BinaryOp>::PacketAccess),
    564     BlockAccess       = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
    565                         int(TensorEvaluator<RightArgType, Device>::BlockAccess),
    566     PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
    567                         int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
    568     Layout            = TensorEvaluator<LeftArgType, Device>::Layout,
    569     CoordAccess       = false,  // to be implemented
    570     RawAccess         = false
    571   };
    572 
    573   TensorEvaluator(const XprType& op, const Device& device)
    574     : m_device(device),
    575       m_functor(op.functor()),
    576       m_leftImpl(op.lhsExpression(), device),
    577       m_rightImpl(op.rhsExpression(), device)
    578   {
    579     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
    580     eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
    581   }
    582 
    583   typedef typename XprType::Index Index;
    584   typedef typename XprType::Scalar Scalar;
    585   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
    586   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    587   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    588   typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
    589   typedef StorageMemory<CoeffReturnType, Device> Storage;
    590   typedef typename Storage::Type EvaluatorPointerType;
    591 
    592   static const int NumDims = internal::array_size<
    593       typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
    594 
    595   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    596   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
    597   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    598 
    599   typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
    600       LeftTensorBlock;
    601   typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
    602       RightTensorBlock;
    603 
    604   typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
    605                                            RightTensorBlock>
    606       TensorBlock;
    607   //===--------------------------------------------------------------------===//
    608 
    609   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
    610   {
    611     // TODO: use right impl instead if right impl dimensions are known at compile time.
    612     return m_leftImpl.dimensions();
    613   }
    614 
    615   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
    616     m_leftImpl.evalSubExprsIfNeeded(NULL);
    617     m_rightImpl.evalSubExprsIfNeeded(NULL);
    618     return true;
    619   }
    620 
    621 #ifdef EIGEN_USE_THREADS
    622   template <typename EvalSubExprsCallback>
    623   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    624       EvaluatorPointerType, EvalSubExprsCallback done) {
    625     // TODO(ezhulenev): Evaluate two expression in parallel?
    626     m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
    627       m_rightImpl.evalSubExprsIfNeededAsync(nullptr,
    628                                             [done](bool) { done(true); });
    629     });
    630   }
    631 #endif  // EIGEN_USE_THREADS
    632 
    633   EIGEN_STRONG_INLINE void cleanup() {
    634     m_leftImpl.cleanup();
    635     m_rightImpl.cleanup();
    636   }
    637 
    638   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
    639   {
    640     return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
    641   }
    642   template<int LoadMode>
    643   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    644   {
    645     return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
    646   }
    647 
    648   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
    649   costPerCoeff(bool vectorized) const {
    650     const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
    651     return m_leftImpl.costPerCoeff(vectorized) +
    652            m_rightImpl.costPerCoeff(vectorized) +
    653            TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
    654   }
    655 
    656   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    657   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    658     static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
    659     return internal::TensorBlockResourceRequirements::merge(
    660                m_leftImpl.getResourceRequirements(),
    661                m_rightImpl.getResourceRequirements())
    662         .addCostPerCoeff({0, 0, functor_cost / PacketSize});
    663   }
    664 
    665   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    666   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    667           bool /*root_of_expr_ast*/ = false) const {
    668     desc.DropDestinationBuffer();
    669     return TensorBlock(m_leftImpl.block(desc, scratch),
    670                          m_rightImpl.block(desc, scratch), m_functor);
    671   }
    672 
    673   EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
    674 
    675   #ifdef EIGEN_USE_SYCL
    676   // binding placeholder accessors to a command group handler for SYCL
    677   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    678     m_leftImpl.bind(cgh);
    679     m_rightImpl.bind(cgh);
    680   }
    681   #endif
    682  private:
    683   const Device EIGEN_DEVICE_REF m_device;
    684   const BinaryOp m_functor;
    685   TensorEvaluator<LeftArgType, Device> m_leftImpl;
    686   TensorEvaluator<RightArgType, Device> m_rightImpl;
    687 };
    688 
    689 // -------------------- CwiseTernaryOp --------------------
    690 
    691 template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device>
    692 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
    693 {
    694   typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
    695 
    696   enum {
    697     IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
    698     PacketAccess      = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
    699                         TensorEvaluator<Arg2Type, Device>::PacketAccess &&
    700                         TensorEvaluator<Arg3Type, Device>::PacketAccess &&
    701                         internal::functor_traits<TernaryOp>::PacketAccess,
    702     BlockAccess       = false,
    703     PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
    704                         TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
    705                         TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
    706     Layout            = TensorEvaluator<Arg1Type, Device>::Layout,
    707     CoordAccess       = false,  // to be implemented
    708     RawAccess         = false
    709   };
    710 
    711   TensorEvaluator(const XprType& op, const Device& device)
    712     : m_functor(op.functor()),
    713       m_arg1Impl(op.arg1Expression(), device),
    714       m_arg2Impl(op.arg2Expression(), device),
    715       m_arg3Impl(op.arg3Expression(), device)
    716   {
    717     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
    718 
    719     EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
    720                          typename internal::traits<Arg2Type>::StorageKind>::value),
    721                         STORAGE_KIND_MUST_MATCH)
    722     EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
    723                          typename internal::traits<Arg3Type>::StorageKind>::value),
    724                         STORAGE_KIND_MUST_MATCH)
    725     EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
    726                          typename internal::traits<Arg2Type>::Index>::value),
    727                         STORAGE_INDEX_MUST_MATCH)
    728     EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
    729                          typename internal::traits<Arg3Type>::Index>::value),
    730                         STORAGE_INDEX_MUST_MATCH)
    731 
    732     eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
    733   }
    734 
    735   typedef typename XprType::Index Index;
    736   typedef typename XprType::Scalar Scalar;
    737   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
    738   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    739   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    740   typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
    741   typedef StorageMemory<CoeffReturnType, Device> Storage;
    742   typedef typename Storage::Type EvaluatorPointerType;
    743 
    744   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    745   typedef internal::TensorBlockNotImplemented TensorBlock;
    746   //===--------------------------------------------------------------------===//
    747 
    748   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
    749   {
    750     // TODO: use arg2 or arg3 dimensions if they are known at compile time.
    751     return m_arg1Impl.dimensions();
    752   }
    753 
    754   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
    755     m_arg1Impl.evalSubExprsIfNeeded(NULL);
    756     m_arg2Impl.evalSubExprsIfNeeded(NULL);
    757     m_arg3Impl.evalSubExprsIfNeeded(NULL);
    758     return true;
    759   }
    760   EIGEN_STRONG_INLINE void cleanup() {
    761     m_arg1Impl.cleanup();
    762     m_arg2Impl.cleanup();
    763     m_arg3Impl.cleanup();
    764   }
    765 
    766   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
    767   {
    768     return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
    769   }
    770   template<int LoadMode>
    771   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    772   {
    773     return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
    774                               m_arg2Impl.template packet<LoadMode>(index),
    775                               m_arg3Impl.template packet<LoadMode>(index));
    776   }
    777 
    778   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
    779   costPerCoeff(bool vectorized) const {
    780     const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
    781     return m_arg1Impl.costPerCoeff(vectorized) +
    782            m_arg2Impl.costPerCoeff(vectorized) +
    783            m_arg3Impl.costPerCoeff(vectorized) +
    784            TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
    785   }
    786 
    787   EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
    788 
    789 #ifdef EIGEN_USE_SYCL
    790    // binding placeholder accessors to a command group handler for SYCL
    791   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    792     m_arg1Impl.bind(cgh);
    793     m_arg2Impl.bind(cgh);
    794     m_arg3Impl.bind(cgh);
    795   }
    796 #endif
    797 
    798  private:
    799   const TernaryOp m_functor;
    800   TensorEvaluator<Arg1Type, Device> m_arg1Impl;
    801   TensorEvaluator<Arg2Type, Device> m_arg2Impl;
    802   TensorEvaluator<Arg3Type, Device> m_arg3Impl;
    803 };
    804 
    805 
    806 // -------------------- SelectOp --------------------
    807 
    808 template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device>
    809 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
    810 {
    811   typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
    812   typedef typename XprType::Scalar Scalar;
    813 
    814   enum {
    815     IsAligned         = TensorEvaluator<ThenArgType, Device>::IsAligned &
    816                         TensorEvaluator<ElseArgType, Device>::IsAligned,
    817     PacketAccess      = TensorEvaluator<ThenArgType, Device>::PacketAccess &
    818                         TensorEvaluator<ElseArgType, Device>::PacketAccess &
    819                         PacketType<Scalar, Device>::HasBlend,
    820     BlockAccess       = TensorEvaluator<IfArgType, Device>::BlockAccess &&
    821                         TensorEvaluator<ThenArgType, Device>::BlockAccess &&
    822                         TensorEvaluator<ElseArgType, Device>::BlockAccess,
    823     PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
    824                         TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
    825                         TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
    826     Layout            = TensorEvaluator<IfArgType, Device>::Layout,
    827     CoordAccess       = false,  // to be implemented
    828     RawAccess         = false
    829   };
    830 
    831   TensorEvaluator(const XprType& op, const Device& device)
    832     : m_condImpl(op.ifExpression(), device),
    833       m_thenImpl(op.thenExpression(), device),
    834       m_elseImpl(op.elseExpression(), device)
    835   {
    836     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
    837     EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
    838     eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
    839     eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
    840   }
    841 
    842   typedef typename XprType::Index Index;
    843   typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
    844   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    845   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
    846   typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
    847   typedef StorageMemory<CoeffReturnType, Device> Storage;
    848   typedef typename Storage::Type EvaluatorPointerType;
    849 
    850   static const int NumDims = internal::array_size<Dimensions>::value;
    851 
    852   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    853     typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
    854   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    855 
    856   typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
    857       IfArgTensorBlock;
    858   typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
    859       ThenArgTensorBlock;
    860   typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
    861       ElseArgTensorBlock;
    862 
    863   struct TensorSelectOpBlockFactory {
    864     template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
    865     struct XprType {
    866       typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
    867     };
    868 
    869     template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
    870     typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
    871         const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const {
    872       return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
    873     }
    874   };
    875 
    876   typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
    877                                            IfArgTensorBlock, ThenArgTensorBlock,
    878                                            ElseArgTensorBlock>
    879       TensorBlock;
    880   //===--------------------------------------------------------------------===//
    881 
    882   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
    883   {
    884     // TODO: use then or else impl instead if they happen to be known at compile time.
    885     return m_condImpl.dimensions();
    886   }
    887 
    888   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
    889     m_condImpl.evalSubExprsIfNeeded(NULL);
    890     m_thenImpl.evalSubExprsIfNeeded(NULL);
    891     m_elseImpl.evalSubExprsIfNeeded(NULL);
    892     return true;
    893   }
    894 
    895 #ifdef EIGEN_USE_THREADS
    896   template <typename EvalSubExprsCallback>
    897   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    898       EvaluatorPointerType, EvalSubExprsCallback done) {
    899     m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
    900       m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
    901         m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); });
    902       });
    903     });
    904   }
    905 #endif  // EIGEN_USE_THREADS
    906 
    907   EIGEN_STRONG_INLINE void cleanup() {
    908     m_condImpl.cleanup();
    909     m_thenImpl.cleanup();
    910     m_elseImpl.cleanup();
    911   }
    912 
    913   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
    914   {
    915     return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
    916   }
    917   template<int LoadMode>
    918   EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
    919   {
    920      internal::Selector<PacketSize> select;
    921      EIGEN_UNROLL_LOOP
    922      for (Index i = 0; i < PacketSize; ++i) {
    923        select.select[i] = m_condImpl.coeff(index+i);
    924      }
    925      return internal::pblend(select,
    926                              m_thenImpl.template packet<LoadMode>(index),
    927                              m_elseImpl.template packet<LoadMode>(index));
    928 
    929   }
    930 
    931   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
    932   costPerCoeff(bool vectorized) const {
    933     return m_condImpl.costPerCoeff(vectorized) +
    934            m_thenImpl.costPerCoeff(vectorized)
    935         .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
    936   }
    937 
    938   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    939   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    940     auto then_req = m_thenImpl.getResourceRequirements();
    941     auto else_req = m_elseImpl.getResourceRequirements();
    942 
    943     auto merged_req =
    944         internal::TensorBlockResourceRequirements::merge(then_req, else_req);
    945     merged_req.cost_per_coeff =
    946         then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
    947 
    948     return internal::TensorBlockResourceRequirements::merge(
    949         m_condImpl.getResourceRequirements(), merged_req);
    950   }
    951 
    952   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    953   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    954           bool /*root_of_expr_ast*/ = false) const {
    955     // It's unsafe to pass destination buffer to underlying expressions, because
    956     // output might be aliased with one of the inputs.
    957     desc.DropDestinationBuffer();
    958 
    959     return TensorBlock(
    960         m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
    961         m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
    962   }
    963 
    964   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
    965 
    966 #ifdef EIGEN_USE_SYCL
    967  // binding placeholder accessors to a command group handler for SYCL
    968   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    969     m_condImpl.bind(cgh);
    970     m_thenImpl.bind(cgh);
    971     m_elseImpl.bind(cgh);
    972   }
    973 #endif
    974  private:
    975   TensorEvaluator<IfArgType, Device> m_condImpl;
    976   TensorEvaluator<ThenArgType, Device> m_thenImpl;
    977   TensorEvaluator<ElseArgType, Device> m_elseImpl;
    978 };
    979 
    980 
    981 } // end namespace Eigen
    982 
    983 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H