cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

TensorMorphing.h (43284B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
     11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
     12 
     13 namespace Eigen {
     14 
     15 /** \class TensorReshaping
     16   * \ingroup CXX11_Tensor_Module
     17   *
     18   * \brief Tensor reshaping class.
     19   *
     20   *
     21   */
     22 namespace internal {
     23 template<typename NewDimensions, typename XprType>
     24 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
     25 {
     26   typedef typename XprType::Scalar Scalar;
     27   typedef traits<XprType> XprTraits;
     28   typedef typename XprTraits::StorageKind StorageKind;
     29   typedef typename XprTraits::Index Index;
     30   typedef typename XprType::Nested Nested;
     31   typedef typename remove_reference<Nested>::type _Nested;
     32   static const int NumDimensions = array_size<NewDimensions>::value;
     33   static const int Layout = XprTraits::Layout;
     34   typedef typename XprTraits::PointerType PointerType;
     35 };
     36 
     37 template<typename NewDimensions, typename XprType>
     38 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
     39 {
     40   typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
     41 };
     42 
     43 template<typename NewDimensions, typename XprType>
     44 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
     45 {
     46   typedef TensorReshapingOp<NewDimensions, XprType> type;
     47 };
     48 
     49 }  // end namespace internal
     50 
     51 
     52 
     53 template<typename NewDimensions, typename XprType>
     54 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
     55 {
     56   public:
     57   typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> Base;
     58   typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
     59   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
     60   typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
     61   typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
     62   typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
     63 
     64   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
     65       : m_xpr(expr), m_dims(dims) {}
     66 
     67     EIGEN_DEVICE_FUNC
     68     const NewDimensions& dimensions() const { return m_dims; }
     69 
     70     EIGEN_DEVICE_FUNC
     71     const typename internal::remove_all<typename XprType::Nested>::type&
     72     expression() const { return m_xpr; }
     73 
     74     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
     75 
     76   protected:
     77     typename XprType::Nested m_xpr;
     78     const NewDimensions m_dims;
     79 };
     80 
     81 
     82 // Eval as rvalue
     83 template<typename NewDimensions, typename ArgType, typename Device>
     84 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
     85 {
     86   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
     87   typedef NewDimensions Dimensions;
     88 
     89   typedef typename XprType::Index Index;
     90   typedef typename XprType::Scalar Scalar;
     91   typedef typename XprType::CoeffReturnType CoeffReturnType;
     92   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
     93   typedef StorageMemory<CoeffReturnType, Device> Storage;
     94   typedef typename Storage::Type EvaluatorPointerType;
     95   typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
     96 
     97   static const int NumOutputDims = internal::array_size<Dimensions>::value;
     98   static const int NumInputDims  = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
     99 
    100   enum ReshapingKind {
    101     // We do not use layout information to determine reshaping kind.
    102     // Depending on the layout `N` can be inner or outer dimension.
    103     OneByN = 0,  // expr.reshape(1, N)
    104     NByOne = 1,  // expr.reshape(N, 1)
    105     Runtime = 2  // Reshape dimensions are dynamic (specified at runtime).
    106   };
    107 
    108   // clang-format off
    109   static const ReshapingKind kind =
    110 #if defined(EIGEN_HAS_INDEX_LIST)
    111         (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN
    112       : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne
    113       : Runtime;
    114 #else
    115         Runtime;
    116 #endif
    117   // clang-format on
    118 
    119   enum {
    120     IsAligned         = TensorEvaluator<ArgType, Device>::IsAligned,
    121     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
    122     // For trivial reshapes with raw access to underlying data we will provide
    123     // zero overhead block access.
    124     // TODO(ezhulenev): Consider adding block access without raw access?
    125     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess &&
    126                         NumInputDims > 0 && NumOutputDims > 0,
    127     PreferBlockAccess = false,
    128     Layout            = TensorEvaluator<ArgType, Device>::Layout,
    129     CoordAccess       = false,  // to be implemented
    130     RawAccess         = TensorEvaluator<ArgType, Device>::RawAccess
    131   };
    132 
    133   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
    134 
    135   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    136   typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
    137   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    138 
    139   typedef
    140       typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims,
    141                                                  Layout, Index>
    142           TensorBlock;
    143   //===--------------------------------------------------------------------===//
    144 
    145   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    146       : m_impl(op.expression(), device), m_dimensions(op.dimensions())
    147   {
    148     // The total size of the reshaped tensor must be equal to the total size
    149     // of the input tensor.
    150     eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
    151   }
    152 
    153   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
    154 
    155 #ifdef EIGEN_USE_THREADS
    156   template <typename EvalSubExprsCallback>
    157   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    158       EvaluatorPointerType data, EvalSubExprsCallback done) {
    159     m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
    160   }
    161 #endif
    162 
    163   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
    164     return m_impl.evalSubExprsIfNeeded(data);
    165   }
    166   EIGEN_STRONG_INLINE void cleanup() {
    167     m_impl.cleanup();
    168   }
    169 
    170   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    171   {
    172     return m_impl.coeff(index);
    173   }
    174 
    175   template<int LoadMode>
    176   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    177   {
    178     return m_impl.template packet<LoadMode>(index);
    179   }
    180 
    181   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    182     return m_impl.costPerCoeff(vectorized);
    183   }
    184 
    185   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    186   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    187     return internal::TensorBlockResourceRequirements::any();
    188   }
    189 
    190   // required in block(OutputTensorBlock* output_block) const
    191   // For C++03 compatibility this must be defined outside the method
    192   struct BlockIteratorState {
    193     Index stride;
    194     Index span;
    195     Index size;
    196     Index count;
    197   };
    198 
    199   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    200   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    201           bool /*root_of_expr_ast*/ = false) const {
    202     eigen_assert(m_impl.data() != NULL);
    203     eigen_assert((kind == Runtime) ||
    204                  (kind == OneByN && desc.dimensions()[0] == 1) ||
    205                  (kind == NByOne && desc.dimensions()[1] == 1));
    206 
    207     if (kind == OneByN || kind == NByOne) {
    208       // We can guarantee at compile time that block is just a contiguous slice
    209       // of the underlying expression memory buffer.
    210       return TensorBlock(internal::TensorBlockKind::kView,
    211                            m_impl.data() + desc.offset(), desc.dimensions());
    212     } else {
    213       // This will do additional runtime checks, and in the end it might be also
    214       // a view, or it might be a block materialized in the temporary buffer.
    215       return TensorBlock::materialize(m_impl.data(), m_dimensions, desc,
    216                                         scratch);
    217     }
    218   }
    219 
    220   EIGEN_DEVICE_FUNC typename Storage::Type data() const {
    221     return constCast(m_impl.data());
    222   }
    223 
    224   EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
    225 
    226   #ifdef EIGEN_USE_SYCL
    227   // binding placeholder accessors to a command group handler for SYCL
    228   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    229     m_impl.bind(cgh);
    230   }
    231   #endif
    232  protected:
    233   TensorEvaluator<ArgType, Device> m_impl;
    234   NewDimensions m_dimensions;
    235 };
    236 
    237 
    238 // Eval as lvalue
    239 template<typename NewDimensions, typename ArgType, typename Device>
    240   struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
    241   : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
    242 
    243 {
    244   typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
    245   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
    246   typedef NewDimensions Dimensions;
    247 
    248   enum {
    249     IsAligned         = TensorEvaluator<ArgType, Device>::IsAligned,
    250     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
    251     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess,
    252     PreferBlockAccess = false,
    253     Layout            = TensorEvaluator<ArgType, Device>::Layout,
    254     CoordAccess       = false,  // to be implemented
    255     RawAccess         = TensorEvaluator<ArgType, Device>::RawAccess
    256   };
    257 
    258   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    259     : Base(op, device)
    260   { }
    261 
    262   typedef typename XprType::Index Index;
    263   typedef typename XprType::Scalar Scalar;
    264   typedef typename XprType::CoeffReturnType CoeffReturnType;
    265   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    266 
    267   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    268   typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index>
    269       TensorBlockDesc;
    270   //===--------------------------------------------------------------------===//
    271 
    272   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
    273   {
    274     return this->m_impl.coeffRef(index);
    275   }
    276 
    277   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    278   void writePacket(Index index, const PacketReturnType& x)
    279   {
    280     this->m_impl.template writePacket<StoreMode>(index, x);
    281   }
    282 
    283   template <typename TensorBlock>
    284   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
    285       const TensorBlockDesc& desc, const TensorBlock& block) {
    286     assert(this->m_impl.data() != NULL);
    287 
    288     typedef typename TensorBlock::XprType TensorBlockExpr;
    289     typedef internal::TensorBlockAssignment<
    290         Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr, Index>
    291         TensorBlockAssign;
    292 
    293     TensorBlockAssign::Run(
    294         TensorBlockAssign::target(desc.dimensions(),
    295                                   internal::strides<Layout>(this->dimensions()),
    296                                   this->m_impl.data(), desc.offset()),
    297         block.expr());
    298   }
    299 };
    300 
    301 
    302 /** \class TensorSlicing
    303   * \ingroup CXX11_Tensor_Module
    304   *
    305   * \brief Tensor slicing class.
    306   *
    307   *
    308   */
    309 namespace internal {
    310 template<typename StartIndices, typename Sizes, typename XprType>
    311 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
    312 {
    313   typedef typename XprType::Scalar Scalar;
    314   typedef traits<XprType> XprTraits;
    315   typedef typename XprTraits::StorageKind StorageKind;
    316   typedef typename XprTraits::Index Index;
    317   typedef typename XprType::Nested Nested;
    318   typedef typename remove_reference<Nested>::type _Nested;
    319   static const int NumDimensions = array_size<StartIndices>::value;
    320   static const int Layout = XprTraits::Layout;
    321   typedef typename XprTraits::PointerType PointerType;
    322 };
    323 
    324 template<typename StartIndices, typename Sizes, typename XprType>
    325 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
    326 {
    327   typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
    328 };
    329 
    330 template<typename StartIndices, typename Sizes, typename XprType>
    331 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
    332 {
    333   typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
    334 };
    335 
    336 }  // end namespace internal
    337 
    338 
    339 
    340 template<typename StartIndices, typename Sizes, typename XprType>
    341 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
    342 {
    343   public:
    344   typedef TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > Base;
    345   typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
    346   typedef typename XprType::CoeffReturnType CoeffReturnType;
    347   typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
    348   typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
    349   typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
    350 
    351   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
    352       : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
    353 
    354     EIGEN_DEVICE_FUNC
    355     const StartIndices& startIndices() const { return m_indices; }
    356     EIGEN_DEVICE_FUNC
    357     const Sizes& sizes() const { return m_sizes; }
    358 
    359     EIGEN_DEVICE_FUNC
    360     const typename internal::remove_all<typename XprType::Nested>::type&
    361     expression() const { return m_xpr; }
    362 
    363     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
    364 
    365   protected:
    366     typename XprType::Nested m_xpr;
    367     const StartIndices m_indices;
    368     const Sizes m_sizes;
    369 };
    370 
    371 
    372 // Fixme: figure out the exact threshold
    373 namespace {
    374 template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
    375   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
    376   EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
    377     const bool prefer_block_evaluation = BlockAccess && total > 32*1024;
    378     return !prefer_block_evaluation && contiguous > threshold_;
    379   }
    380 
    381  private:
    382   Index threshold_;
    383 };
    384 
    385 // It is very expensive to start the memcpy kernel on GPU: we therefore only
    386 // use it for large copies.
    387 #ifdef EIGEN_USE_GPU
    388 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, GpuDevice, BlockAccess>  {
    389   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
    390   EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; }
    391 };
    392 #endif
    393 
    394 // It is very expensive to start the memcpy kernel on GPU: we therefore only
    395 // use it for large copies.
    396 #ifdef EIGEN_USE_SYCL
    397 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice, BlockAccess>  {
    398   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
    399   EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; }
    400 };
    401 #endif
    402 
    403 }
    404 
    405 // Eval as rvalue
    406 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
    407 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
    408 {
    409   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
    410   static const int NumDims = internal::array_size<Sizes>::value;
    411 
    412   typedef typename XprType::Index Index;
    413   typedef typename XprType::Scalar Scalar;
    414   typedef typename XprType::CoeffReturnType CoeffReturnType;
    415   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    416   typedef Sizes Dimensions;
    417   typedef StorageMemory<CoeffReturnType, Device> Storage;
    418   typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
    419   typedef typename Storage::Type EvaluatorPointerType;
    420 
    421   enum {
    422     // Alignment can't be guaranteed at compile time since it depends on the
    423     // slice offsets and sizes.
    424     IsAligned         = false,
    425     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
    426     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess &&
    427                         // FIXME: Temporary workaround for bug in slicing of bool tensors.
    428                         !internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value,
    429     PreferBlockAccess = true,
    430     Layout            = TensorEvaluator<ArgType, Device>::Layout,
    431     CoordAccess       = false,
    432     RawAccess         = false
    433   };
    434 
    435   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
    436 
    437   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    438   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
    439   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    440 
    441   // Tensor slicing does not change the block type.
    442   typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
    443       TensorBlock;
    444   //===--------------------------------------------------------------------===//
    445 
    446   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    447       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
    448   {
    449     m_is_identity = true;
    450     for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
    451       eigen_assert(m_impl.dimensions()[i] >=
    452                    op.sizes()[i] + op.startIndices()[i]);
    453       if (m_impl.dimensions()[i] != op.sizes()[i] ||
    454           op.startIndices()[i] != 0) {
    455         m_is_identity = false;
    456       }
    457     }
    458 
    459     // No strides for scalars.
    460     if (NumDims == 0) return;
    461 
    462     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
    463     const Sizes& output_dims = op.sizes();
    464     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    465       m_inputStrides[0] = 1;
    466       for (int i = 1; i < NumDims; ++i) {
    467         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
    468       }
    469 
    470      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
    471       m_outputStrides[0] = 1;
    472       for (int i = 1; i < NumDims; ++i) {
    473         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
    474         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
    475       }
    476     } else {
    477       m_inputStrides[NumDims-1] = 1;
    478       for (int i = NumDims - 2; i >= 0; --i) {
    479         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
    480       }
    481 
    482      // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
    483       m_outputStrides[NumDims-1] = 1;
    484       for (int i = NumDims - 2; i >= 0; --i) {
    485         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
    486         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
    487       }
    488     }
    489   }
    490 
    491   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
    492 
    493   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
    494     m_impl.evalSubExprsIfNeeded(NULL);
    495     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
    496         && data && m_impl.data()) {
    497       Index contiguous_values = 1;
    498       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    499         for (int i = 0; i < NumDims; ++i) {
    500           contiguous_values *= dimensions()[i];
    501           if (dimensions()[i] != m_impl.dimensions()[i]) {
    502             break;
    503           }
    504         }
    505       } else {
    506         for (int i = NumDims-1; i >= 0; --i) {
    507           contiguous_values *= dimensions()[i];
    508           if (dimensions()[i] != m_impl.dimensions()[i]) {
    509             break;
    510           }
    511         }
    512       }
    513       // Use memcpy if it's going to be faster than using the regular evaluation.
    514       const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
    515       if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
    516         EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
    517         for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
    518           Index offset = srcCoeff(i);
    519           m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar));
    520         }
    521         return false;
    522       }
    523     }
    524     return true;
    525   }
    526 
    527 #ifdef EIGEN_USE_THREADS
    528   template <typename EvalSubExprsCallback>
    529   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
    530       EvaluatorPointerType /*data*/, EvalSubExprsCallback done) {
    531     m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
    532   }
    533 #endif  // EIGEN_USE_THREADS
    534 
    535   EIGEN_STRONG_INLINE void cleanup() {
    536     m_impl.cleanup();
    537   }
    538 
    539   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    540   {
    541     if (m_is_identity) {
    542       return m_impl.coeff(index);
    543     } else {
    544       return m_impl.coeff(srcCoeff(index));
    545     }
    546   }
    547 
    548   template<int LoadMode>
    549   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    550   {
    551     const int packetSize = PacketType<CoeffReturnType, Device>::size;
    552     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    553     eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
    554 
    555     if (m_is_identity) {
    556       return m_impl.template packet<LoadMode>(index);
    557     }
    558 
    559     Index inputIndices[] = {0, 0};
    560     Index indices[] = {index, index + packetSize - 1};
    561     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    562       EIGEN_UNROLL_LOOP
    563       for (int i = NumDims - 1; i > 0; --i) {
    564         const Index idx0 = indices[0] / m_fastOutputStrides[i];
    565         const Index idx1 = indices[1] / m_fastOutputStrides[i];
    566         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
    567         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
    568         indices[0] -= idx0 * m_outputStrides[i];
    569         indices[1] -= idx1 * m_outputStrides[i];
    570       }
    571       inputIndices[0] += (indices[0] + m_offsets[0]);
    572       inputIndices[1] += (indices[1] + m_offsets[0]);
    573     } else {
    574       EIGEN_UNROLL_LOOP
    575       for (int i = 0; i < NumDims - 1; ++i) {
    576         const Index idx0 = indices[0] / m_fastOutputStrides[i];
    577         const Index idx1 = indices[1] / m_fastOutputStrides[i];
    578         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
    579         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
    580         indices[0] -= idx0 * m_outputStrides[i];
    581         indices[1] -= idx1 * m_outputStrides[i];
    582       }
    583       inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
    584       inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
    585     }
    586     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
    587       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
    588       return rslt;
    589     }
    590     else {
    591       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
    592       values[0] = m_impl.coeff(inputIndices[0]);
    593       values[packetSize-1] = m_impl.coeff(inputIndices[1]);
    594       EIGEN_UNROLL_LOOP
    595       for (int i = 1; i < packetSize-1; ++i) {
    596         values[i] = coeff(index+i);
    597       }
    598       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
    599       return rslt;
    600     }
    601   }
    602 
    603   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    604     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
    605   }
    606 
    607   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    608   internal::TensorBlockResourceRequirements getResourceRequirements() const {
    609     const size_t target_size = m_device.lastLevelCacheSize();
    610     return internal::TensorBlockResourceRequirements::merge(
    611         internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
    612         m_impl.getResourceRequirements());
    613   }
    614 
    615   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
    616   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
    617           bool /*root_of_expr_ast*/ = false) const {
    618     TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
    619     TensorBlock block = m_impl.block(arg_desc, scratch);
    620     if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
    621     return block;
    622   }
    623 
    624   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
    625     typename Storage::Type result = constCast(m_impl.data());
    626     if (result) {
    627       Index offset = 0;
    628       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    629         for (int i = 0; i < NumDims; ++i) {
    630           if (m_dimensions[i] != m_impl.dimensions()[i]) {
    631             offset += m_offsets[i] * m_inputStrides[i];
    632             for (int j = i+1; j < NumDims; ++j) {
    633               if (m_dimensions[j] > 1) {
    634                 return NULL;
    635               }
    636               offset += m_offsets[j] * m_inputStrides[j];
    637             }
    638             break;
    639           }
    640         }
    641       } else {
    642         for (int i = NumDims - 1; i >= 0; --i) {
    643           if (m_dimensions[i] != m_impl.dimensions()[i]) {
    644             offset += m_offsets[i] * m_inputStrides[i];
    645             for (int j = i-1; j >= 0; --j) {
    646               if (m_dimensions[j] > 1) {
    647                 return NULL;
    648               }
    649               offset += m_offsets[j] * m_inputStrides[j];
    650             }
    651             break;
    652           }
    653         }
    654       }
    655       return result + offset;
    656     }
    657     return NULL;
    658   }
    659 #ifdef EIGEN_USE_SYCL
    660   // binding placeholder accessors to a command group handler for SYCL
    661   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
    662     m_impl.bind(cgh);
    663   }
    664 #endif
    665 
    666  protected:
    667   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
    668   {
    669     Index inputIndex = 0;
    670     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    671       EIGEN_UNROLL_LOOP
    672       for (int i = NumDims - 1; i > 0; --i) {
    673         const Index idx = index / m_fastOutputStrides[i];
    674         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
    675         index -= idx * m_outputStrides[i];
    676       }
    677       inputIndex += (index + m_offsets[0]);
    678     } else {
    679       EIGEN_UNROLL_LOOP
    680       for (int i = 0; i < NumDims - 1; ++i) {
    681         const Index idx = index / m_fastOutputStrides[i];
    682         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
    683         index -= idx * m_outputStrides[i];
    684       }
    685       inputIndex += (index + m_offsets[NumDims-1]);
    686     }
    687     return inputIndex;
    688   }
    689 
    690   array<Index, NumDims> m_outputStrides;
    691   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
    692   array<Index, NumDims> m_inputStrides;
    693   TensorEvaluator<ArgType, Device> m_impl;
    694   const Device EIGEN_DEVICE_REF m_device;
    695   Dimensions m_dimensions;
    696   bool m_is_identity;
    697   const StartIndices m_offsets;
    698 };
    699 
    700 
    701 // Eval as lvalue
    702 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
    703 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
    704   : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
    705 {
    706   typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
    707   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
    708   static const int NumDims = internal::array_size<Sizes>::value;
    709 
    710   typedef typename XprType::Index Index;
    711   typedef typename XprType::Scalar Scalar;
    712   typedef typename XprType::CoeffReturnType CoeffReturnType;
    713   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    714   typedef Sizes Dimensions;
    715 
    716   enum {
    717     IsAligned         = false,
    718     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
    719     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess,
    720     PreferBlockAccess = true,
    721     Layout            = TensorEvaluator<ArgType, Device>::Layout,
    722     CoordAccess       = false,
    723     RawAccess         = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
    724   };
    725 
    726   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
    727 
    728   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    729   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
    730   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    731   //===--------------------------------------------------------------------===//
    732 
    733   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    734     : Base(op, device)
    735     { }
    736 
    737   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
    738   {
    739     if (this->m_is_identity) {
    740       return this->m_impl.coeffRef(index);
    741     } else {
    742       return this->m_impl.coeffRef(this->srcCoeff(index));
    743     }
    744   }
    745 
    746   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    747   void writePacket(Index index, const PacketReturnType& x)
    748   {
    749     if (this->m_is_identity) {
    750       this->m_impl.template writePacket<StoreMode>(index, x);
    751       return;
    752     }
    753 
    754     const int packetSize = PacketType<CoeffReturnType, Device>::size;
    755     Index inputIndices[] = {0, 0};
    756     Index indices[] = {index, index + packetSize - 1};
    757     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    758       EIGEN_UNROLL_LOOP
    759       for (int i = NumDims - 1; i > 0; --i) {
    760         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
    761         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
    762         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
    763         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
    764         indices[0] -= idx0 * this->m_outputStrides[i];
    765         indices[1] -= idx1 * this->m_outputStrides[i];
    766       }
    767       inputIndices[0] += (indices[0] + this->m_offsets[0]);
    768       inputIndices[1] += (indices[1] + this->m_offsets[0]);
    769     } else {
    770       EIGEN_UNROLL_LOOP
    771       for (int i = 0; i < NumDims - 1; ++i) {
    772         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
    773         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
    774         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
    775         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
    776         indices[0] -= idx0 * this->m_outputStrides[i];
    777         indices[1] -= idx1 * this->m_outputStrides[i];
    778       }
    779       inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
    780       inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
    781     }
    782     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
    783       this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
    784     }
    785     else {
    786       EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
    787       internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
    788       this->m_impl.coeffRef(inputIndices[0]) = values[0];
    789       this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
    790       EIGEN_UNROLL_LOOP
    791       for (int i = 1; i < packetSize-1; ++i) {
    792         this->coeffRef(index+i) = values[i];
    793       }
    794     }
    795   }
    796 
    797   template<typename TensorBlock>
    798   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
    799       const TensorBlockDesc& desc, const TensorBlock& block) {
    800     TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
    801     this->m_impl.writeBlock(arg_desc, block);
    802   }
    803 };
    804 
    805 namespace internal {
    806 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
    807 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
    808 {
    809   typedef typename XprType::Scalar Scalar;
    810   typedef traits<XprType> XprTraits;
    811   typedef typename XprTraits::StorageKind StorageKind;
    812   typedef typename XprTraits::Index Index;
    813   typedef typename XprType::Nested Nested;
    814   typedef typename remove_reference<Nested>::type _Nested;
    815   static const int NumDimensions = array_size<StartIndices>::value;
    816   static const int Layout = XprTraits::Layout;
    817   typedef typename XprTraits::PointerType PointerType;
    818 };
    819 
    820 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
    821 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
    822 {
    823   typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
    824 };
    825 
    826 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
    827 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
    828 {
    829   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
    830 };
    831 
    832 }  // end namespace internal
    833 
    834 
    835 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
    836 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
    837 {
    838   public:
    839   typedef TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > Base;
    840   typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
    841   typedef typename XprType::CoeffReturnType CoeffReturnType;
    842   typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
    843   typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
    844   typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
    845 
    846   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
    847     const XprType& expr, const StartIndices& startIndices,
    848     const StopIndices& stopIndices, const Strides& strides)
    849       : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
    850         m_strides(strides) {}
    851 
    852     EIGEN_DEVICE_FUNC
    853     const StartIndices& startIndices() const { return m_startIndices; }
    854     EIGEN_DEVICE_FUNC
    855     const StartIndices& stopIndices() const { return m_stopIndices; }
    856     EIGEN_DEVICE_FUNC
    857     const StartIndices& strides() const { return m_strides; }
    858 
    859     EIGEN_DEVICE_FUNC
    860     const typename internal::remove_all<typename XprType::Nested>::type&
    861     expression() const { return m_xpr; }
    862 
    863     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
    864 
    865   protected:
    866     typename XprType::Nested m_xpr;
    867     const StartIndices m_startIndices;
    868     const StopIndices m_stopIndices;
    869     const Strides m_strides;
    870 };
    871 
    872 // Eval as rvalue
    873 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
    874 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
    875 {
    876   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
    877   static const int NumDims = internal::array_size<Strides>::value;
    878   typedef typename XprType::Index Index;
    879   typedef typename XprType::Scalar Scalar;
    880   typedef typename XprType::CoeffReturnType CoeffReturnType;
    881   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    882   typedef StorageMemory<CoeffReturnType, Device> Storage;
    883   typedef typename Storage::Type EvaluatorPointerType;
    884   typedef Strides Dimensions;
    885 
    886   enum {
    887     // Alignment can't be guaranteed at compile time since it depends on the
    888     // slice offsets and sizes.
    889     IsAligned = false,
    890     PacketAccess = false,
    891     BlockAccess = false,
    892     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
    893     Layout = TensorEvaluator<ArgType, Device>::Layout,
    894     RawAccess = false
    895   };
    896 
    897   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
    898   typedef internal::TensorBlockNotImplemented TensorBlock;
    899   //===--------------------------------------------------------------------===//
    900 
    901   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    902       : m_impl(op.expression(), device),
    903         m_device(device),
    904         m_strides(op.strides())
    905   {
    906     // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
    907     DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
    908     for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
    909       eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
    910       if (m_strides[i] > 0) {
    911         startIndicesClamped[i] =
    912             clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
    913         stopIndicesClamped[i] =
    914             clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
    915       } else {
    916         /* implies m_strides[i] < 0 by assert */
    917         startIndicesClamped[i] =
    918             clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
    919         stopIndicesClamped[i] =
    920             clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
    921       }
    922       m_startIndices[i] = startIndicesClamped[i];
    923     }
    924 
    925     typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
    926     const InputDimensions& input_dims = m_impl.dimensions();
    927 
    928     // compute output tensor shape
    929     m_is_identity = true;
    930     for (int i = 0; i < NumDims; i++) {
    931       Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
    932       if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
    933         m_dimensions[i] = 0;
    934       } else {
    935         m_dimensions[i] =
    936             (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
    937         eigen_assert(m_dimensions[i] >= 0);
    938       }
    939       if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
    940         m_is_identity = false;
    941       }
    942     }
    943 
    944     Strides output_dims = m_dimensions;
    945 
    946     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    947       m_inputStrides[0] = m_strides[0];
    948       m_offsets[0] = startIndicesClamped[0];
    949       Index previousDimProduct = 1;
    950       for (int i = 1; i < NumDims; ++i) {
    951         previousDimProduct *= input_dims[i-1];
    952         m_inputStrides[i] = previousDimProduct * m_strides[i];
    953         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
    954       }
    955 
    956       // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
    957       m_outputStrides[0] = 1;
    958       for (int i = 1; i < NumDims; ++i) {
    959         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
    960         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
    961       }
    962     } else {
    963       m_inputStrides[NumDims-1] = m_strides[NumDims-1];
    964       m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
    965       Index previousDimProduct = 1;
    966       for (int i = NumDims - 2; i >= 0; --i) {
    967         previousDimProduct *= input_dims[i+1];
    968         m_inputStrides[i] = previousDimProduct * m_strides[i];
    969         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
    970       }
    971 
    972       m_outputStrides[NumDims-1] = 1;
    973       for (int i = NumDims - 2; i >= 0; --i) {
    974         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
    975         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
    976       }
    977     }
    978   }
    979 
    980   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
    981 
    982 
    983   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
    984     m_impl.evalSubExprsIfNeeded(NULL);
    985     return true;
    986   }
    987 
    988   EIGEN_STRONG_INLINE void cleanup() {
    989     m_impl.cleanup();
    990   }
    991 
    992   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    993   {
    994     if (m_is_identity) {
    995       return m_impl.coeff(index);
    996     } else {
    997       return m_impl.coeff(srcCoeff(index));
    998     }
    999   }
   1000 
   1001   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
   1002     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
   1003   }
   1004 
   1005   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
   1006     return NULL;
   1007   }
   1008 #ifdef EIGEN_USE_SYCL
   1009   // binding placeholder accessors to a command group handler for SYCL
   1010   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
   1011     m_impl.bind(cgh);
   1012   }
   1013 #endif
   1014  protected:
   1015   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
   1016   {
   1017     Index inputIndex = 0;
   1018     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
   1019       EIGEN_UNROLL_LOOP
   1020       for (int i = NumDims - 1; i >= 0; --i) {
   1021         const Index idx = index / m_fastOutputStrides[i];
   1022         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
   1023         index -= idx * m_outputStrides[i];
   1024       }
   1025     } else {
   1026       EIGEN_UNROLL_LOOP
   1027       for (int i = 0; i < NumDims; ++i) {
   1028         const Index idx = index / m_fastOutputStrides[i];
   1029         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
   1030         index -= idx * m_outputStrides[i];
   1031       }
   1032     }
   1033     return inputIndex;
   1034   }
   1035 
   1036   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
   1037 #ifndef SYCL_DEVICE_ONLY
   1038     return numext::maxi(min, numext::mini(max,value));
   1039 #else
   1040     return cl::sycl::clamp(value, min, max);
   1041 #endif
   1042   }
   1043 
   1044   array<Index, NumDims> m_outputStrides;
   1045   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
   1046   array<Index, NumDims> m_inputStrides;
   1047   bool m_is_identity;
   1048   TensorEvaluator<ArgType, Device> m_impl;
   1049   const Device EIGEN_DEVICE_REF m_device;
   1050   DSizes<Index, NumDims> m_startIndices; // clamped startIndices
   1051   DSizes<Index, NumDims> m_dimensions;
   1052   DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
   1053   const Strides m_strides;
   1054 };
   1055 
   1056 // Eval as lvalue
   1057 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
   1058 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
   1059   : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
   1060 {
   1061   typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
   1062   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
   1063   static const int NumDims = internal::array_size<Strides>::value;
   1064 
   1065   enum {
   1066     IsAligned = false,
   1067     PacketAccess = false,
   1068     BlockAccess = false,
   1069     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
   1070     Layout = TensorEvaluator<ArgType, Device>::Layout,
   1071     CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
   1072     RawAccess = false
   1073   };
   1074 
   1075   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
   1076   typedef internal::TensorBlockNotImplemented TensorBlock;
   1077   //===--------------------------------------------------------------------===//
   1078 
   1079   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
   1080     : Base(op, device)
   1081     { }
   1082 
   1083   typedef typename XprType::Index Index;
   1084   typedef typename XprType::Scalar Scalar;
   1085   typedef typename XprType::CoeffReturnType CoeffReturnType;
   1086   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
   1087   typedef Strides Dimensions;
   1088 
   1089   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
   1090   {
   1091     if (this->m_is_identity) {
   1092       return this->m_impl.coeffRef(index);
   1093     } else {
   1094       return this->m_impl.coeffRef(this->srcCoeff(index));
   1095     }
   1096   }
   1097 };
   1098 
   1099 
   1100 } // end namespace Eigen
   1101 
   1102 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H