TensorEvaluator.h (40005B)
1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 12 13 namespace Eigen { 14 15 /** \class TensorEvaluator 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief The tensor evaluator classes. 19 * 20 * These classes are responsible for the evaluation of the tensor expression. 21 * 22 * TODO: add support for more types of expressions, in particular expressions 23 * leading to lvalues (slicing, reshaping, etc...) 24 */ 25 26 // Generic evaluator 27 template<typename Derived, typename Device> 28 struct TensorEvaluator 29 { 30 typedef typename Derived::Index Index; 31 typedef typename Derived::Scalar Scalar; 32 typedef typename Derived::Scalar CoeffReturnType; 33 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 34 typedef typename Derived::Dimensions Dimensions; 35 typedef Derived XprType; 36 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 37 typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType; 38 typedef StorageMemory<Scalar, Device> Storage; 39 typedef typename Storage::Type EvaluatorPointerType; 40 41 // NumDimensions is -1 for variable dim tensors 42 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? 43 internal::traits<Derived>::NumDimensions : 0; 44 45 enum { 46 IsAligned = Derived::IsAligned, 47 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), 48 BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value, 49 PreferBlockAccess = false, 50 Layout = Derived::Layout, 51 CoordAccess = NumCoords > 0, 52 RawAccess = true 53 }; 54 55 typedef typename internal::remove_const<Scalar>::type ScalarNoConst; 56 57 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 58 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc; 59 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 60 61 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords, 62 Layout, Index> 63 TensorBlock; 64 //===--------------------------------------------------------------------===// 65 66 EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) 67 : m_data(device.get((const_cast<TensorPointerType>(m.data())))), 68 m_dims(m.dimensions()), 69 m_device(device) 70 { } 71 72 73 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } 74 75 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) { 76 if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) { 77 m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar)); 78 return false; 79 } 80 return true; 81 } 82 83 #ifdef EIGEN_USE_THREADS 84 template <typename EvalSubExprsCallback> 85 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 86 EvaluatorPointerType dest, EvalSubExprsCallback done) { 87 // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation. 88 done(evalSubExprsIfNeeded(dest)); 89 } 90 #endif // EIGEN_USE_THREADS 91 92 EIGEN_STRONG_INLINE void cleanup() {} 93 94 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { 95 eigen_assert(m_data != NULL); 96 return m_data[index]; 97 } 98 99 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) { 100 eigen_assert(m_data != NULL); 101 return m_data[index]; 102 } 103 104 template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 105 PacketReturnType packet(Index index) const 106 { 107 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); 108 } 109 110 // Return a packet starting at `index` where `umask` specifies which elements 111 // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for 112 // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding 113 // float element will be loaded, otherwise 0 will be loaded. 114 // Function has been templatized to enable Sfinae. 115 template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 116 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type 117 partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const 118 { 119 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask); 120 } 121 122 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 123 void writePacket(Index index, const PacketReturnType& x) 124 { 125 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x); 126 } 127 128 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { 129 eigen_assert(m_data != NULL); 130 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 131 return m_data[m_dims.IndexOfColMajor(coords)]; 132 } else { 133 return m_data[m_dims.IndexOfRowMajor(coords)]; 134 } 135 } 136 137 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& 138 coeffRef(const array<DenseIndex, NumCoords>& coords) { 139 eigen_assert(m_data != NULL); 140 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 141 return m_data[m_dims.IndexOfColMajor(coords)]; 142 } else { 143 return m_data[m_dims.IndexOfRowMajor(coords)]; 144 } 145 } 146 147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 148 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, 149 PacketType<CoeffReturnType, Device>::size); 150 } 151 152 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 153 internal::TensorBlockResourceRequirements getResourceRequirements() const { 154 return internal::TensorBlockResourceRequirements::any(); 155 } 156 157 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 158 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 159 bool /*root_of_expr_ast*/ = false) const { 160 assert(m_data != NULL); 161 return TensorBlock::materialize(m_data, m_dims, desc, scratch); 162 } 163 164 template<typename TensorBlock> 165 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( 166 const TensorBlockDesc& desc, const TensorBlock& block) { 167 assert(m_data != NULL); 168 169 typedef typename TensorBlock::XprType TensorBlockExpr; 170 typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr, 171 Index> 172 TensorBlockAssign; 173 174 TensorBlockAssign::Run( 175 TensorBlockAssign::target(desc.dimensions(), 176 internal::strides<Layout>(m_dims), m_data, 177 desc.offset()), 178 block.expr()); 179 } 180 181 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } 182 183 #ifdef EIGEN_USE_SYCL 184 // binding placeholder accessors to a command group handler for SYCL 185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 186 m_data.bind(cgh); 187 } 188 #endif 189 protected: 190 EvaluatorPointerType m_data; 191 Dimensions m_dims; 192 const Device EIGEN_DEVICE_REF m_device; 193 }; 194 195 namespace { 196 template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 197 T loadConstant(const T* address) { 198 return *address; 199 } 200 // Use the texture cache on CUDA devices whenever possible 201 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350 202 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 203 float loadConstant(const float* address) { 204 return __ldg(address); 205 } 206 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 207 double loadConstant(const double* address) { 208 return __ldg(address); 209 } 210 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE 211 Eigen::half loadConstant(const Eigen::half* address) { 212 return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); 213 } 214 #endif 215 #ifdef EIGEN_USE_SYCL 216 // overload of load constant should be implemented here based on range access 217 template <cl::sycl::access::mode AcMd, typename T> 218 T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) { 219 return *address; 220 } 221 #endif 222 } 223 224 225 // Default evaluator for rvalues 226 template<typename Derived, typename Device> 227 struct TensorEvaluator<const Derived, Device> 228 { 229 typedef typename Derived::Index Index; 230 typedef typename Derived::Scalar Scalar; 231 typedef typename Derived::Scalar CoeffReturnType; 232 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 233 typedef typename Derived::Dimensions Dimensions; 234 typedef const Derived XprType; 235 typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType; 236 typedef StorageMemory<const Scalar, Device> Storage; 237 typedef typename Storage::Type EvaluatorPointerType; 238 239 typedef typename internal::remove_const<Scalar>::type ScalarNoConst; 240 241 // NumDimensions is -1 for variable dim tensors 242 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? 243 internal::traits<Derived>::NumDimensions : 0; 244 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 245 246 enum { 247 IsAligned = Derived::IsAligned, 248 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), 249 BlockAccess = internal::is_arithmetic<ScalarNoConst>::value, 250 PreferBlockAccess = false, 251 Layout = Derived::Layout, 252 CoordAccess = NumCoords > 0, 253 RawAccess = true 254 }; 255 256 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 257 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc; 258 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 259 260 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords, 261 Layout, Index> 262 TensorBlock; 263 //===--------------------------------------------------------------------===// 264 265 EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) 266 : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device) 267 { } 268 269 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } 270 271 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { 272 if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) { 273 m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar)); 274 return false; 275 } 276 return true; 277 } 278 279 #ifdef EIGEN_USE_THREADS 280 template <typename EvalSubExprsCallback> 281 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 282 EvaluatorPointerType dest, EvalSubExprsCallback done) { 283 // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation. 284 done(evalSubExprsIfNeeded(dest)); 285 } 286 #endif // EIGEN_USE_THREADS 287 288 EIGEN_STRONG_INLINE void cleanup() { } 289 290 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { 291 eigen_assert(m_data != NULL); 292 return loadConstant(m_data+index); 293 } 294 295 template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 296 PacketReturnType packet(Index index) const 297 { 298 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index); 299 } 300 301 // Return a packet starting at `index` where `umask` specifies which elements 302 // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for 303 // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding 304 // float element will be loaded, otherwise 0 will be loaded. 305 // Function has been templatized to enable Sfinae. 306 template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 307 typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type 308 partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const 309 { 310 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask); 311 } 312 313 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { 314 eigen_assert(m_data != NULL); 315 const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords) 316 : m_dims.IndexOfRowMajor(coords); 317 return loadConstant(m_data+index); 318 } 319 320 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 321 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, 322 PacketType<CoeffReturnType, Device>::size); 323 } 324 325 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 326 internal::TensorBlockResourceRequirements getResourceRequirements() const { 327 return internal::TensorBlockResourceRequirements::any(); 328 } 329 330 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 331 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 332 bool /*root_of_expr_ast*/ = false) const { 333 assert(m_data != NULL); 334 return TensorBlock::materialize(m_data, m_dims, desc, scratch); 335 } 336 337 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; } 338 #ifdef EIGEN_USE_SYCL 339 // binding placeholder accessors to a command group handler for SYCL 340 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 341 m_data.bind(cgh); 342 } 343 #endif 344 protected: 345 EvaluatorPointerType m_data; 346 Dimensions m_dims; 347 const Device EIGEN_DEVICE_REF m_device; 348 }; 349 350 351 352 353 // -------------------- CwiseNullaryOp -------------------- 354 355 template<typename NullaryOp, typename ArgType, typename Device> 356 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> 357 { 358 typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType; 359 360 TensorEvaluator(const XprType& op, const Device& device) 361 : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() 362 { } 363 364 typedef typename XprType::Index Index; 365 typedef typename XprType::Scalar Scalar; 366 typedef typename internal::traits<XprType>::Scalar CoeffReturnType; 367 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 368 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 369 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; 370 typedef StorageMemory<CoeffReturnType, Device> Storage; 371 typedef typename Storage::Type EvaluatorPointerType; 372 373 enum { 374 IsAligned = true, 375 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess 376 #ifdef EIGEN_USE_SYCL 377 && (PacketType<CoeffReturnType, Device>::size >1) 378 #endif 379 , 380 BlockAccess = false, 381 PreferBlockAccess = false, 382 Layout = TensorEvaluator<ArgType, Device>::Layout, 383 CoordAccess = false, // to be implemented 384 RawAccess = false 385 }; 386 387 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 388 typedef internal::TensorBlockNotImplemented TensorBlock; 389 //===--------------------------------------------------------------------===// 390 391 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } 392 393 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; } 394 395 #ifdef EIGEN_USE_THREADS 396 template <typename EvalSubExprsCallback> 397 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 398 EvaluatorPointerType, EvalSubExprsCallback done) { 399 done(true); 400 } 401 #endif // EIGEN_USE_THREADS 402 403 EIGEN_STRONG_INLINE void cleanup() { } 404 405 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const 406 { 407 return m_wrapper(m_functor, index); 408 } 409 410 template<int LoadMode> 411 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 412 { 413 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index); 414 } 415 416 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 417 costPerCoeff(bool vectorized) const { 418 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, 419 PacketType<CoeffReturnType, Device>::size); 420 } 421 422 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } 423 424 #ifdef EIGEN_USE_SYCL 425 // binding placeholder accessors to a command group handler for SYCL 426 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 427 m_argImpl.bind(cgh); 428 } 429 #endif 430 431 private: 432 const NullaryOp m_functor; 433 TensorEvaluator<ArgType, Device> m_argImpl; 434 const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper; 435 }; 436 437 438 439 // -------------------- CwiseUnaryOp -------------------- 440 441 template<typename UnaryOp, typename ArgType, typename Device> 442 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> 443 { 444 typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType; 445 446 enum { 447 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, 448 PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) & 449 int(internal::functor_traits<UnaryOp>::PacketAccess), 450 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, 451 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess, 452 Layout = TensorEvaluator<ArgType, Device>::Layout, 453 CoordAccess = false, // to be implemented 454 RawAccess = false 455 }; 456 457 TensorEvaluator(const XprType& op, const Device& device) 458 : m_device(device), 459 m_functor(op.functor()), 460 m_argImpl(op.nestedExpression(), device) 461 { } 462 463 typedef typename XprType::Index Index; 464 typedef typename XprType::Scalar Scalar; 465 typedef typename internal::remove_const<Scalar>::type ScalarNoConst; 466 typedef typename internal::traits<XprType>::Scalar CoeffReturnType; 467 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 468 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 469 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; 470 typedef StorageMemory<CoeffReturnType, Device> Storage; 471 typedef typename Storage::Type EvaluatorPointerType; 472 static const int NumDims = internal::array_size<Dimensions>::value; 473 474 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 475 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 476 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 477 478 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock 479 ArgTensorBlock; 480 481 typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock> 482 TensorBlock; 483 //===--------------------------------------------------------------------===// 484 485 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } 486 487 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { 488 m_argImpl.evalSubExprsIfNeeded(NULL); 489 return true; 490 } 491 492 #ifdef EIGEN_USE_THREADS 493 template <typename EvalSubExprsCallback> 494 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 495 EvaluatorPointerType, EvalSubExprsCallback done) { 496 m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); 497 } 498 #endif // EIGEN_USE_THREADS 499 500 EIGEN_STRONG_INLINE void cleanup() { 501 m_argImpl.cleanup(); 502 } 503 504 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const 505 { 506 return m_functor(m_argImpl.coeff(index)); 507 } 508 509 template<int LoadMode> 510 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 511 { 512 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index)); 513 } 514 515 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 516 const double functor_cost = internal::functor_traits<UnaryOp>::Cost; 517 return m_argImpl.costPerCoeff(vectorized) + 518 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); 519 } 520 521 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 522 internal::TensorBlockResourceRequirements getResourceRequirements() const { 523 static const double functor_cost = internal::functor_traits<UnaryOp>::Cost; 524 return m_argImpl.getResourceRequirements().addCostPerCoeff( 525 {0, 0, functor_cost / PacketSize}); 526 } 527 528 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 529 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 530 bool /*root_of_expr_ast*/ = false) const { 531 return TensorBlock(m_argImpl.block(desc, scratch), m_functor); 532 } 533 534 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } 535 536 #ifdef EIGEN_USE_SYCL 537 // binding placeholder accessors to a command group handler for SYCL 538 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{ 539 m_argImpl.bind(cgh); 540 } 541 #endif 542 543 544 private: 545 const Device EIGEN_DEVICE_REF m_device; 546 const UnaryOp m_functor; 547 TensorEvaluator<ArgType, Device> m_argImpl; 548 }; 549 550 551 // -------------------- CwiseBinaryOp -------------------- 552 553 template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device> 554 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device> 555 { 556 typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType; 557 558 enum { 559 IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) & 560 int(TensorEvaluator<RightArgType, Device>::IsAligned), 561 PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) & 562 int(TensorEvaluator<RightArgType, Device>::PacketAccess) & 563 int(internal::functor_traits<BinaryOp>::PacketAccess), 564 BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) & 565 int(TensorEvaluator<RightArgType, Device>::BlockAccess), 566 PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) | 567 int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess), 568 Layout = TensorEvaluator<LeftArgType, Device>::Layout, 569 CoordAccess = false, // to be implemented 570 RawAccess = false 571 }; 572 573 TensorEvaluator(const XprType& op, const Device& device) 574 : m_device(device), 575 m_functor(op.functor()), 576 m_leftImpl(op.lhsExpression(), device), 577 m_rightImpl(op.rhsExpression(), device) 578 { 579 EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); 580 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); 581 } 582 583 typedef typename XprType::Index Index; 584 typedef typename XprType::Scalar Scalar; 585 typedef typename internal::traits<XprType>::Scalar CoeffReturnType; 586 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 587 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 588 typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions; 589 typedef StorageMemory<CoeffReturnType, Device> Storage; 590 typedef typename Storage::Type EvaluatorPointerType; 591 592 static const int NumDims = internal::array_size< 593 typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value; 594 595 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 596 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 597 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 598 599 typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock 600 LeftTensorBlock; 601 typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock 602 RightTensorBlock; 603 604 typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock, 605 RightTensorBlock> 606 TensorBlock; 607 //===--------------------------------------------------------------------===// 608 609 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const 610 { 611 // TODO: use right impl instead if right impl dimensions are known at compile time. 612 return m_leftImpl.dimensions(); 613 } 614 615 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { 616 m_leftImpl.evalSubExprsIfNeeded(NULL); 617 m_rightImpl.evalSubExprsIfNeeded(NULL); 618 return true; 619 } 620 621 #ifdef EIGEN_USE_THREADS 622 template <typename EvalSubExprsCallback> 623 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 624 EvaluatorPointerType, EvalSubExprsCallback done) { 625 // TODO(ezhulenev): Evaluate two expression in parallel? 626 m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) { 627 m_rightImpl.evalSubExprsIfNeededAsync(nullptr, 628 [done](bool) { done(true); }); 629 }); 630 } 631 #endif // EIGEN_USE_THREADS 632 633 EIGEN_STRONG_INLINE void cleanup() { 634 m_leftImpl.cleanup(); 635 m_rightImpl.cleanup(); 636 } 637 638 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const 639 { 640 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); 641 } 642 template<int LoadMode> 643 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 644 { 645 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index)); 646 } 647 648 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 649 costPerCoeff(bool vectorized) const { 650 const double functor_cost = internal::functor_traits<BinaryOp>::Cost; 651 return m_leftImpl.costPerCoeff(vectorized) + 652 m_rightImpl.costPerCoeff(vectorized) + 653 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); 654 } 655 656 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 657 internal::TensorBlockResourceRequirements getResourceRequirements() const { 658 static const double functor_cost = internal::functor_traits<BinaryOp>::Cost; 659 return internal::TensorBlockResourceRequirements::merge( 660 m_leftImpl.getResourceRequirements(), 661 m_rightImpl.getResourceRequirements()) 662 .addCostPerCoeff({0, 0, functor_cost / PacketSize}); 663 } 664 665 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 666 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 667 bool /*root_of_expr_ast*/ = false) const { 668 desc.DropDestinationBuffer(); 669 return TensorBlock(m_leftImpl.block(desc, scratch), 670 m_rightImpl.block(desc, scratch), m_functor); 671 } 672 673 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } 674 675 #ifdef EIGEN_USE_SYCL 676 // binding placeholder accessors to a command group handler for SYCL 677 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 678 m_leftImpl.bind(cgh); 679 m_rightImpl.bind(cgh); 680 } 681 #endif 682 private: 683 const Device EIGEN_DEVICE_REF m_device; 684 const BinaryOp m_functor; 685 TensorEvaluator<LeftArgType, Device> m_leftImpl; 686 TensorEvaluator<RightArgType, Device> m_rightImpl; 687 }; 688 689 // -------------------- CwiseTernaryOp -------------------- 690 691 template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device> 692 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device> 693 { 694 typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType; 695 696 enum { 697 IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned, 698 PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess && 699 TensorEvaluator<Arg2Type, Device>::PacketAccess && 700 TensorEvaluator<Arg3Type, Device>::PacketAccess && 701 internal::functor_traits<TernaryOp>::PacketAccess, 702 BlockAccess = false, 703 PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess || 704 TensorEvaluator<Arg2Type, Device>::PreferBlockAccess || 705 TensorEvaluator<Arg3Type, Device>::PreferBlockAccess, 706 Layout = TensorEvaluator<Arg1Type, Device>::Layout, 707 CoordAccess = false, // to be implemented 708 RawAccess = false 709 }; 710 711 TensorEvaluator(const XprType& op, const Device& device) 712 : m_functor(op.functor()), 713 m_arg1Impl(op.arg1Expression(), device), 714 m_arg2Impl(op.arg2Expression(), device), 715 m_arg3Impl(op.arg3Expression(), device) 716 { 717 EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); 718 719 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind, 720 typename internal::traits<Arg2Type>::StorageKind>::value), 721 STORAGE_KIND_MUST_MATCH) 722 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind, 723 typename internal::traits<Arg3Type>::StorageKind>::value), 724 STORAGE_KIND_MUST_MATCH) 725 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index, 726 typename internal::traits<Arg2Type>::Index>::value), 727 STORAGE_INDEX_MUST_MATCH) 728 EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index, 729 typename internal::traits<Arg3Type>::Index>::value), 730 STORAGE_INDEX_MUST_MATCH) 731 732 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); 733 } 734 735 typedef typename XprType::Index Index; 736 typedef typename XprType::Scalar Scalar; 737 typedef typename internal::traits<XprType>::Scalar CoeffReturnType; 738 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 739 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 740 typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions; 741 typedef StorageMemory<CoeffReturnType, Device> Storage; 742 typedef typename Storage::Type EvaluatorPointerType; 743 744 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 745 typedef internal::TensorBlockNotImplemented TensorBlock; 746 //===--------------------------------------------------------------------===// 747 748 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const 749 { 750 // TODO: use arg2 or arg3 dimensions if they are known at compile time. 751 return m_arg1Impl.dimensions(); 752 } 753 754 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { 755 m_arg1Impl.evalSubExprsIfNeeded(NULL); 756 m_arg2Impl.evalSubExprsIfNeeded(NULL); 757 m_arg3Impl.evalSubExprsIfNeeded(NULL); 758 return true; 759 } 760 EIGEN_STRONG_INLINE void cleanup() { 761 m_arg1Impl.cleanup(); 762 m_arg2Impl.cleanup(); 763 m_arg3Impl.cleanup(); 764 } 765 766 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const 767 { 768 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); 769 } 770 template<int LoadMode> 771 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 772 { 773 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index), 774 m_arg2Impl.template packet<LoadMode>(index), 775 m_arg3Impl.template packet<LoadMode>(index)); 776 } 777 778 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 779 costPerCoeff(bool vectorized) const { 780 const double functor_cost = internal::functor_traits<TernaryOp>::Cost; 781 return m_arg1Impl.costPerCoeff(vectorized) + 782 m_arg2Impl.costPerCoeff(vectorized) + 783 m_arg3Impl.costPerCoeff(vectorized) + 784 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); 785 } 786 787 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } 788 789 #ifdef EIGEN_USE_SYCL 790 // binding placeholder accessors to a command group handler for SYCL 791 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 792 m_arg1Impl.bind(cgh); 793 m_arg2Impl.bind(cgh); 794 m_arg3Impl.bind(cgh); 795 } 796 #endif 797 798 private: 799 const TernaryOp m_functor; 800 TensorEvaluator<Arg1Type, Device> m_arg1Impl; 801 TensorEvaluator<Arg2Type, Device> m_arg2Impl; 802 TensorEvaluator<Arg3Type, Device> m_arg3Impl; 803 }; 804 805 806 // -------------------- SelectOp -------------------- 807 808 template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device> 809 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device> 810 { 811 typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType; 812 typedef typename XprType::Scalar Scalar; 813 814 enum { 815 IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & 816 TensorEvaluator<ElseArgType, Device>::IsAligned, 817 PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & 818 TensorEvaluator<ElseArgType, Device>::PacketAccess & 819 PacketType<Scalar, Device>::HasBlend, 820 BlockAccess = TensorEvaluator<IfArgType, Device>::BlockAccess && 821 TensorEvaluator<ThenArgType, Device>::BlockAccess && 822 TensorEvaluator<ElseArgType, Device>::BlockAccess, 823 PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess || 824 TensorEvaluator<ThenArgType, Device>::PreferBlockAccess || 825 TensorEvaluator<ElseArgType, Device>::PreferBlockAccess, 826 Layout = TensorEvaluator<IfArgType, Device>::Layout, 827 CoordAccess = false, // to be implemented 828 RawAccess = false 829 }; 830 831 TensorEvaluator(const XprType& op, const Device& device) 832 : m_condImpl(op.ifExpression(), device), 833 m_thenImpl(op.thenExpression(), device), 834 m_elseImpl(op.elseExpression(), device) 835 { 836 EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); 837 EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); 838 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions())); 839 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions())); 840 } 841 842 typedef typename XprType::Index Index; 843 typedef typename internal::traits<XprType>::Scalar CoeffReturnType; 844 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 845 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 846 typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions; 847 typedef StorageMemory<CoeffReturnType, Device> Storage; 848 typedef typename Storage::Type EvaluatorPointerType; 849 850 static const int NumDims = internal::array_size<Dimensions>::value; 851 852 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 853 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 854 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 855 856 typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock 857 IfArgTensorBlock; 858 typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock 859 ThenArgTensorBlock; 860 typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock 861 ElseArgTensorBlock; 862 863 struct TensorSelectOpBlockFactory { 864 template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType> 865 struct XprType { 866 typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type; 867 }; 868 869 template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType> 870 typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr( 871 const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const { 872 return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr); 873 } 874 }; 875 876 typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory, 877 IfArgTensorBlock, ThenArgTensorBlock, 878 ElseArgTensorBlock> 879 TensorBlock; 880 //===--------------------------------------------------------------------===// 881 882 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const 883 { 884 // TODO: use then or else impl instead if they happen to be known at compile time. 885 return m_condImpl.dimensions(); 886 } 887 888 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { 889 m_condImpl.evalSubExprsIfNeeded(NULL); 890 m_thenImpl.evalSubExprsIfNeeded(NULL); 891 m_elseImpl.evalSubExprsIfNeeded(NULL); 892 return true; 893 } 894 895 #ifdef EIGEN_USE_THREADS 896 template <typename EvalSubExprsCallback> 897 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 898 EvaluatorPointerType, EvalSubExprsCallback done) { 899 m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) { 900 m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) { 901 m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); }); 902 }); 903 }); 904 } 905 #endif // EIGEN_USE_THREADS 906 907 EIGEN_STRONG_INLINE void cleanup() { 908 m_condImpl.cleanup(); 909 m_thenImpl.cleanup(); 910 m_elseImpl.cleanup(); 911 } 912 913 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const 914 { 915 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); 916 } 917 template<int LoadMode> 918 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const 919 { 920 internal::Selector<PacketSize> select; 921 EIGEN_UNROLL_LOOP 922 for (Index i = 0; i < PacketSize; ++i) { 923 select.select[i] = m_condImpl.coeff(index+i); 924 } 925 return internal::pblend(select, 926 m_thenImpl.template packet<LoadMode>(index), 927 m_elseImpl.template packet<LoadMode>(index)); 928 929 } 930 931 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 932 costPerCoeff(bool vectorized) const { 933 return m_condImpl.costPerCoeff(vectorized) + 934 m_thenImpl.costPerCoeff(vectorized) 935 .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); 936 } 937 938 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 939 internal::TensorBlockResourceRequirements getResourceRequirements() const { 940 auto then_req = m_thenImpl.getResourceRequirements(); 941 auto else_req = m_elseImpl.getResourceRequirements(); 942 943 auto merged_req = 944 internal::TensorBlockResourceRequirements::merge(then_req, else_req); 945 merged_req.cost_per_coeff = 946 then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff); 947 948 return internal::TensorBlockResourceRequirements::merge( 949 m_condImpl.getResourceRequirements(), merged_req); 950 } 951 952 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 953 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 954 bool /*root_of_expr_ast*/ = false) const { 955 // It's unsafe to pass destination buffer to underlying expressions, because 956 // output might be aliased with one of the inputs. 957 desc.DropDestinationBuffer(); 958 959 return TensorBlock( 960 m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch), 961 m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory()); 962 } 963 964 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; } 965 966 #ifdef EIGEN_USE_SYCL 967 // binding placeholder accessors to a command group handler for SYCL 968 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 969 m_condImpl.bind(cgh); 970 m_thenImpl.bind(cgh); 971 m_elseImpl.bind(cgh); 972 } 973 #endif 974 private: 975 TensorEvaluator<IfArgType, Device> m_condImpl; 976 TensorEvaluator<ThenArgType, Device> m_thenImpl; 977 TensorEvaluator<ElseArgType, Device> m_elseImpl; 978 }; 979 980 981 } // end namespace Eigen 982 983 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H