GenericPacketMath.h - cart-elc - Source code for CART-ELC

GenericPacketMath.h (38812B)
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
      5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_GENERIC_PACKET_MATH_H
     12 #define EIGEN_GENERIC_PACKET_MATH_H
     13 
     14 namespace Eigen {
     15 
     16 namespace internal {
     17 
     18 /** \internal
     19   * \file GenericPacketMath.h
     20   *
     21   * Default implementation for types not supported by the vectorization.
     22   * In practice these functions are provided to make easier the writing
     23   * of generic vectorized code.
     24   */
     25 
     26 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
     27 #define EIGEN_DEBUG_ALIGNED_LOAD
     28 #endif
     29 
     30 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
     31 #define EIGEN_DEBUG_UNALIGNED_LOAD
     32 #endif
     33 
     34 #ifndef EIGEN_DEBUG_ALIGNED_STORE
     35 #define EIGEN_DEBUG_ALIGNED_STORE
     36 #endif
     37 
     38 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
     39 #define EIGEN_DEBUG_UNALIGNED_STORE
     40 #endif
     41 
     42 struct default_packet_traits
     43 {
     44   enum {
     45     HasHalfPacket = 0,
     46 
     47     HasAdd       = 1,
     48     HasSub       = 1,
     49     HasShift     = 1,
     50     HasMul       = 1,
     51     HasNegate    = 1,
     52     HasAbs       = 1,
     53     HasArg       = 0,
     54     HasAbs2      = 1,
     55     HasAbsDiff   = 0,
     56     HasMin       = 1,
     57     HasMax       = 1,
     58     HasConj      = 1,
     59     HasSetLinear = 1,
     60     HasBlend     = 0,
     61     // This flag is used to indicate whether packet comparison is supported.
     62     // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
     63     HasCmp       = 0,
     64 
     65     HasDiv    = 0,
     66     HasSqrt   = 0,
     67     HasRsqrt  = 0,
     68     HasExp    = 0,
     69     HasExpm1  = 0,
     70     HasLog    = 0,
     71     HasLog1p  = 0,
     72     HasLog10  = 0,
     73     HasPow    = 0,
     74 
     75     HasSin    = 0,
     76     HasCos    = 0,
     77     HasTan    = 0,
     78     HasASin   = 0,
     79     HasACos   = 0,
     80     HasATan   = 0,
     81     HasSinh   = 0,
     82     HasCosh   = 0,
     83     HasTanh   = 0,
     84     HasLGamma = 0,
     85     HasDiGamma = 0,
     86     HasZeta = 0,
     87     HasPolygamma = 0,
     88     HasErf = 0,
     89     HasErfc = 0,
     90     HasNdtri = 0,
     91     HasBessel = 0,
     92     HasIGamma = 0,
     93     HasIGammaDerA = 0,
     94     HasGammaSampleDerAlpha = 0,
     95     HasIGammac = 0,
     96     HasBetaInc = 0,
     97 
     98     HasRound  = 0,
     99     HasRint   = 0,
    100     HasFloor  = 0,
    101     HasCeil   = 0,
    102     HasSign   = 0
    103   };
    104 };
    105 
    106 template<typename T> struct packet_traits : default_packet_traits
    107 {
    108   typedef T type;
    109   typedef T half;
    110   enum {
    111     Vectorizable = 0,
    112     size = 1,
    113     AlignedOnScalar = 0,
    114     HasHalfPacket = 0
    115   };
    116   enum {
    117     HasAdd    = 0,
    118     HasSub    = 0,
    119     HasMul    = 0,
    120     HasNegate = 0,
    121     HasAbs    = 0,
    122     HasAbs2   = 0,
    123     HasMin    = 0,
    124     HasMax    = 0,
    125     HasConj   = 0,
    126     HasSetLinear = 0
    127   };
    128 };
    129 
    130 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
    131 
    132 template<typename T> struct unpacket_traits
    133 {
    134   typedef T type;
    135   typedef T half;
    136   enum
    137   {
    138     size = 1,
    139     alignment = 1,
    140     vectorizable = false,
    141     masked_load_available=false,
    142     masked_store_available=false
    143   };
    144 };
    145 
    146 template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
    147 
    148 template <typename Src, typename Tgt> struct type_casting_traits {
    149   enum {
    150     VectorizedCast = 0,
    151     SrcCoeffRatio = 1,
    152     TgtCoeffRatio = 1
    153   };
    154 };
    155 
    156 /** \internal Wrapper to ensure that multiple packet types can map to the same
    157     same underlying vector type. */
    158 template<typename T, int unique_id = 0>
    159 struct eigen_packet_wrapper
    160 {
    161   EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
    162   EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
    163   EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
    164   EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
    165   EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
    166     m_val = v;
    167     return *this;
    168   }
    169 
    170   T m_val;
    171 };
    172 
    173 
    174 /** \internal A convenience utility for determining if the type is a scalar.
    175  * This is used to enable some generic packet implementations.
    176  */
    177 template<typename Packet>
    178 struct is_scalar {
    179   typedef typename unpacket_traits<Packet>::type Scalar;
    180   enum {
    181     value = internal::is_same<Packet, Scalar>::value
    182   };
    183 };
    184 
    185 /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
    186 template <typename SrcPacket, typename TgtPacket>
    187 EIGEN_DEVICE_FUNC inline TgtPacket
    188 pcast(const SrcPacket& a) {
    189   return static_cast<TgtPacket>(a);
    190 }
    191 template <typename SrcPacket, typename TgtPacket>
    192 EIGEN_DEVICE_FUNC inline TgtPacket
    193 pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
    194   return static_cast<TgtPacket>(a);
    195 }
    196 template <typename SrcPacket, typename TgtPacket>
    197 EIGEN_DEVICE_FUNC inline TgtPacket
    198 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
    199   return static_cast<TgtPacket>(a);
    200 }
    201 template <typename SrcPacket, typename TgtPacket>
    202 EIGEN_DEVICE_FUNC inline TgtPacket
    203 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
    204       const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
    205   return static_cast<TgtPacket>(a);
    206 }
    207 
    208 /** \internal \returns reinterpret_cast<Target>(a) */
    209 template <typename Target, typename Packet>
    210 EIGEN_DEVICE_FUNC inline Target
    211 preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
    212 
    213 /** \internal \returns a + b (coeff-wise) */
    214 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    215 padd(const Packet& a, const Packet& b) { return a+b; }
    216 // Avoid compiler warning for boolean algebra.
    217 template<> EIGEN_DEVICE_FUNC inline bool
    218 padd(const bool& a, const bool& b) { return a || b; }
    219 
    220 /** \internal \returns a - b (coeff-wise) */
    221 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    222 psub(const Packet& a, const Packet& b) { return a-b; }
    223 
    224 /** \internal \returns -a (coeff-wise) */
    225 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    226 pnegate(const Packet& a) { return -a; }
    227 
    228 template<> EIGEN_DEVICE_FUNC inline bool
    229 pnegate(const bool& a) { return !a; }
    230 
    231 /** \internal \returns conj(a) (coeff-wise) */
    232 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    233 pconj(const Packet& a) { return numext::conj(a); }
    234 
    235 /** \internal \returns a * b (coeff-wise) */
    236 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    237 pmul(const Packet& a, const Packet& b) { return a*b; }
    238 // Avoid compiler warning for boolean algebra.
    239 template<> EIGEN_DEVICE_FUNC inline bool
    240 pmul(const bool& a, const bool& b) { return a && b; }
    241 
    242 /** \internal \returns a / b (coeff-wise) */
    243 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    244 pdiv(const Packet& a, const Packet& b) { return a/b; }
    245 
    246 // In the generic case, memset to all one bits.
    247 template<typename Packet, typename EnableIf = void>
    248 struct ptrue_impl {
    249   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
    250     Packet b;
    251     memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
    252     return b;
    253   }
    254 };
    255 
    256 // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
    257 // Although this is technically not a valid bitmask, the scalar path for pselect
    258 // uses a comparison to zero, so this should still work in most cases. We don't
    259 // have another option, since the scalar type requires initialization.
    260 template<typename T>
    261 struct ptrue_impl<T, 
    262     typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
    263   static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
    264     return T(1);
    265   }
    266 };
    267 
    268 /** \internal \returns one bits. */
    269 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    270 ptrue(const Packet& a) {
    271   return ptrue_impl<Packet>::run(a);
    272 }
    273 
    274 // In the general case, memset to zero.
    275 template<typename Packet, typename EnableIf = void>
    276 struct pzero_impl {
    277   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
    278     Packet b;
    279     memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
    280     return b;
    281   }
    282 };
    283 
    284 // For scalars, explicitly set to Scalar(0), since the underlying representation
    285 // for zero may not consist of all-zero bits.
    286 template<typename T>
    287 struct pzero_impl<T,
    288     typename internal::enable_if<is_scalar<T>::value>::type> {
    289   static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
    290     return T(0);
    291   }
    292 };
    293 
    294 /** \internal \returns packet of zeros */
    295 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    296 pzero(const Packet& a) {
    297   return pzero_impl<Packet>::run(a);
    298 }
    299 
    300 /** \internal \returns a <= b as a bit mask */
    301 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    302 pcmp_le(const Packet& a, const Packet& b)  { return a<=b ? ptrue(a) : pzero(a); }
    303 
    304 /** \internal \returns a < b as a bit mask */
    305 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    306 pcmp_lt(const Packet& a, const Packet& b)  { return a<b ? ptrue(a) : pzero(a); }
    307 
    308 /** \internal \returns a == b as a bit mask */
    309 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    310 pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
    311 
    312 /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
    313 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    314 pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
    315 
    316 template<typename T>
    317 struct bit_and {
    318   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
    319     return a & b;
    320   }
    321 };
    322 
    323 template<typename T>
    324 struct bit_or {
    325   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
    326     return a | b;
    327   }
    328 };
    329 
    330 template<typename T>
    331 struct bit_xor {
    332   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
    333     return a ^ b;
    334   }
    335 };
    336 
    337 template<typename T>
    338 struct bit_not {
    339   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
    340     return ~a;
    341   }
    342 };
    343 
    344 // Use operators &, |, ^, ~.
    345 template<typename T>
    346 struct operator_bitwise_helper {
    347   EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
    348   EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
    349   EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
    350   EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
    351 };
    352 
    353 // Apply binary operations byte-by-byte
    354 template<typename T>
    355 struct bytewise_bitwise_helper {
    356   EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
    357     return binary(a, b, bit_and<unsigned char>());
    358   }
    359   EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { 
    360     return binary(a, b, bit_or<unsigned char>());
    361    }
    362   EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
    363     return binary(a, b, bit_xor<unsigned char>());
    364   }
    365   EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { 
    366     return unary(a,bit_not<unsigned char>());
    367    }
    368   
    369  private:
    370   template<typename Op>
    371   EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
    372     const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
    373     T c;
    374     unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
    375     for (size_t i = 0; i < sizeof(T); ++i) {
    376       *c_ptr++ = op(*a_ptr++);
    377     }
    378     return c;
    379   }
    380 
    381   template<typename Op>
    382   EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
    383     const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
    384     const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
    385     T c;
    386     unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
    387     for (size_t i = 0; i < sizeof(T); ++i) {
    388       *c_ptr++ = op(*a_ptr++, *b_ptr++);
    389     }
    390     return c;
    391   }
    392 };
    393 
    394 // In the general case, use byte-by-byte manipulation.
    395 template<typename T, typename EnableIf = void>
    396 struct bitwise_helper : public bytewise_bitwise_helper<T> {};
    397 
    398 // For integers or non-trivial scalars, use binary operators.
    399 template<typename T>
    400 struct bitwise_helper<T,
    401   typename internal::enable_if<
    402     is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
    403   > : public operator_bitwise_helper<T> {};
    404 
    405 /** \internal \returns the bitwise and of \a a and \a b */
    406 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    407 pand(const Packet& a, const Packet& b) {
    408   return bitwise_helper<Packet>::bitwise_and(a, b);
    409 }
    410 
    411 /** \internal \returns the bitwise or of \a a and \a b */
    412 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    413 por(const Packet& a, const Packet& b) {
    414   return bitwise_helper<Packet>::bitwise_or(a, b);
    415 }
    416 
    417 /** \internal \returns the bitwise xor of \a a and \a b */
    418 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    419 pxor(const Packet& a, const Packet& b) {
    420   return bitwise_helper<Packet>::bitwise_xor(a, b);
    421 }
    422 
    423 /** \internal \returns the bitwise not of \a a */
    424 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    425 pnot(const Packet& a) {
    426   return bitwise_helper<Packet>::bitwise_not(a);
    427 }
    428 
    429 /** \internal \returns the bitwise and of \a a and not \a b */
    430 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    431 pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
    432 
    433 // In the general case, use bitwise select.
    434 template<typename Packet, typename EnableIf = void>
    435 struct pselect_impl {
    436   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
    437     return por(pand(a,mask),pandnot(b,mask));
    438   }
    439 };
    440 
    441 // For scalars, use ternary select.
    442 template<typename Packet>
    443 struct pselect_impl<Packet, 
    444     typename internal::enable_if<is_scalar<Packet>::value>::type > {
    445   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
    446     return numext::equal_strict(mask, Packet(0)) ? b : a;
    447   }
    448 };
    449 
    450 /** \internal \returns \a or \b for each field in packet according to \mask */
    451 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    452 pselect(const Packet& mask, const Packet& a, const Packet& b) {
    453   return pselect_impl<Packet>::run(mask, a, b);
    454 }
    455 
    456 template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
    457     const bool& cond, const bool& a, const bool& b) {
    458   return cond ? a : b;
    459 }
    460 
    461 /** \internal \returns the min or of \a a and \a b (coeff-wise)
    462     If either \a a or \a b are NaN, the result is implementation defined. */
    463 template<int NaNPropagation>
    464 struct pminmax_impl {
    465   template <typename Packet, typename Op>
    466   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
    467     return op(a,b);
    468   }
    469 };
    470 
    471 /** \internal \returns the min or max of \a a and \a b (coeff-wise)
    472     If either \a a or \a b are NaN, NaN is returned. */
    473 template<>
    474 struct pminmax_impl<PropagateNaN> {
    475   template <typename Packet, typename Op>
    476   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
    477   Packet not_nan_mask_a = pcmp_eq(a, a);
    478   Packet not_nan_mask_b = pcmp_eq(b, b);
    479   return pselect(not_nan_mask_a,
    480                  pselect(not_nan_mask_b, op(a, b), b),
    481                  a);
    482   }
    483 };
    484 
    485 /** \internal \returns the min or max of \a a and \a b (coeff-wise)
    486     If both \a a and \a b are NaN, NaN is returned.
    487     Equivalent to std::fmin(a, b).  */
    488 template<>
    489 struct pminmax_impl<PropagateNumbers> {
    490   template <typename Packet, typename Op>
    491   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
    492   Packet not_nan_mask_a = pcmp_eq(a, a);
    493   Packet not_nan_mask_b = pcmp_eq(b, b);
    494   return pselect(not_nan_mask_a,
    495                  pselect(not_nan_mask_b, op(a, b), a),
    496                  b);
    497   }
    498 };
    499 
    500 
    501 #ifndef SYCL_DEVICE_ONLY
    502 #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
    503 #else
    504 #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
    505 [](const Type& a, const Type& b) { \
    506         return Func(a, b);}
    507 #endif
    508 
    509 /** \internal \returns the min of \a a and \a b  (coeff-wise).
    510     If \a a or \b b is NaN, the return value is implementation defined. */
    511 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    512 pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
    513 
    514 /** \internal \returns the min of \a a and \a b  (coeff-wise).
    515     NaNPropagation determines the NaN propagation semantics. */
    516 template <int NaNPropagation, typename Packet>
    517 EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
    518   return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
    519 }
    520 
    521 /** \internal \returns the max of \a a and \a b  (coeff-wise)
    522     If \a a or \b b is NaN, the return value is implementation defined. */
    523 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    524 pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
    525 
    526 /** \internal \returns the max of \a a and \a b  (coeff-wise).
    527     NaNPropagation determines the NaN propagation semantics. */
    528 template <int NaNPropagation, typename Packet>
    529 EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
    530   return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
    531 }
    532 
    533 /** \internal \returns the absolute value of \a a */
    534 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    535 pabs(const Packet& a) { return numext::abs(a); }
    536 template<> EIGEN_DEVICE_FUNC inline unsigned int
    537 pabs(const unsigned int& a) { return a; }
    538 template<> EIGEN_DEVICE_FUNC inline unsigned long
    539 pabs(const unsigned long& a) { return a; }
    540 template<> EIGEN_DEVICE_FUNC inline unsigned long long
    541 pabs(const unsigned long long& a) { return a; }
    542 
    543 /** \internal \returns the addsub value of \a a,b */
    544 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    545 paddsub(const Packet& a, const Packet& b) {
    546   return pselect(peven_mask(a), padd(a, b), psub(a, b));
    547  }
    548 
    549 /** \internal \returns the phase angle of \a a */
    550 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    551 parg(const Packet& a) { using numext::arg; return arg(a); }
    552 
    553 
    554 /** \internal \returns \a a logically shifted by N bits to the right */
    555 template<int N> EIGEN_DEVICE_FUNC inline int
    556 parithmetic_shift_right(const int& a) { return a >> N; }
    557 template<int N> EIGEN_DEVICE_FUNC inline long int
    558 parithmetic_shift_right(const long int& a) { return a >> N; }
    559 
    560 /** \internal \returns \a a arithmetically shifted by N bits to the right */
    561 template<int N> EIGEN_DEVICE_FUNC inline int
    562 plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
    563 template<int N> EIGEN_DEVICE_FUNC inline long int
    564 plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
    565 
    566 /** \internal \returns \a a shifted by N bits to the left */
    567 template<int N> EIGEN_DEVICE_FUNC inline int
    568 plogical_shift_left(const int& a) { return a << N; }
    569 template<int N> EIGEN_DEVICE_FUNC inline long int
    570 plogical_shift_left(const long int& a) { return a << N; }
    571 
    572 /** \internal \returns the significant and exponent of the underlying floating point numbers
    573   * See https://en.cppreference.com/w/cpp/numeric/math/frexp
    574   */
    575 template <typename Packet>
    576 EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
    577   int exp;
    578   EIGEN_USING_STD(frexp);
    579   Packet result = static_cast<Packet>(frexp(a, &exp));
    580   exponent = static_cast<Packet>(exp);
    581   return result;
    582 }
    583 
    584 /** \internal \returns a * 2^((int)exponent)
    585   * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
    586   */
    587 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    588 pldexp(const Packet &a, const Packet &exponent) {
    589   EIGEN_USING_STD(ldexp)
    590   return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
    591 }
    592 
    593 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
    594 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    595 pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
    596 
    597 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
    598 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    599 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
    600 
    601 /** \internal \returns a packet version of \a *from, (un-aligned load) */
    602 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    603 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
    604 
    605 /** \internal \returns a packet version of \a *from, (un-aligned masked load)
    606  * There is no generic implementation. We only have implementations for specialized
    607  * cases. Generic case should not be called.
    608  */
    609 template<typename Packet> EIGEN_DEVICE_FUNC inline
    610 typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
    611 ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
    612 
    613 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
    614 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    615 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
    616 
    617 /** \internal \returns a packet with constant coefficients set from bits */
    618 template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
    619 pset1frombits(BitsType a);
    620 
    621 /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
    622 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    623 pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }
    624 
    625 /** \internal \returns a packet with elements of \a *from duplicated.
    626   * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
    627   * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
    628   * Currently, this function is only used for scalar * complex products.
    629   */
    630 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
    631 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
    632 
    633 /** \internal \returns a packet with elements of \a *from quadrupled.
    634   * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
    635   * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
    636   * Currently, this function is only used in matrix products.
    637   * For packet-size smaller or equal to 4, this function is equivalent to pload1
    638   */
    639 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    640 ploadquad(const typename unpacket_traits<Packet>::type* from)
    641 { return pload1<Packet>(from); }
    642 
    643 /** \internal equivalent to
    644   * \code
    645   * a0 = pload1(a+0);
    646   * a1 = pload1(a+1);
    647   * a2 = pload1(a+2);
    648   * a3 = pload1(a+3);
    649   * \endcode
    650   * \sa pset1, pload1, ploaddup, pbroadcast2
    651   */
    652 template<typename Packet> EIGEN_DEVICE_FUNC
    653 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
    654                         Packet& a0, Packet& a1, Packet& a2, Packet& a3)
    655 {
    656   a0 = pload1<Packet>(a+0);
    657   a1 = pload1<Packet>(a+1);
    658   a2 = pload1<Packet>(a+2);
    659   a3 = pload1<Packet>(a+3);
    660 }
    661 
    662 /** \internal equivalent to
    663   * \code
    664   * a0 = pload1(a+0);
    665   * a1 = pload1(a+1);
    666   * \endcode
    667   * \sa pset1, pload1, ploaddup, pbroadcast4
    668   */
    669 template<typename Packet> EIGEN_DEVICE_FUNC
    670 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
    671                         Packet& a0, Packet& a1)
    672 {
    673   a0 = pload1<Packet>(a+0);
    674   a1 = pload1<Packet>(a+1);
    675 }
    676 
    677 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
    678 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
    679 plset(const typename unpacket_traits<Packet>::type& a) { return a; }
    680 
    681 /** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
    682      where x is the value of all 1-bits. */
    683 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    684 peven_mask(const Packet& /*a*/) {
    685   typedef typename unpacket_traits<Packet>::type Scalar;
    686   const size_t n = unpacket_traits<Packet>::size;
    687   EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
    688   for(size_t i = 0; i < n; ++i) {
    689     memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
    690   }
    691   return ploadu<Packet>(elements);
    692 }
    693 
    694 
    695 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
    696 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
    697 { (*to) = from; }
    698 
    699 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
    700 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
    701 {  (*to) = from; }
    702 
    703 /** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
    704  * There is no generic implementation. We only have implementations for specialized
    705  * cases. Generic case should not be called.
    706  */
    707 template<typename Scalar, typename Packet>
    708 EIGEN_DEVICE_FUNC inline
    709 typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
    710 pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
    711 
    712  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
    713  { return ploadu<Packet>(from); }
    714 
    715  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
    716  { pstore(to, from); }
    717 
    718 /** \internal tries to do cache prefetching of \a addr */
    719 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
    720 {
    721 #if defined(EIGEN_HIP_DEVICE_COMPILE)
    722   // do nothing
    723 #elif defined(EIGEN_CUDA_ARCH)
    724 #if defined(__LP64__) || EIGEN_OS_WIN64
    725   // 64-bit pointer operand constraint for inlined asm
    726   asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
    727 #else
    728   // 32-bit pointer operand constraint for inlined asm
    729   asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
    730 #endif
    731 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
    732   __builtin_prefetch(addr);
    733 #endif
    734 }
    735 
    736 /** \internal \returns the reversed elements of \a a*/
    737 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
    738 { return a; }
    739 
    740 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
    741 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
    742 {
    743   return Packet(numext::imag(a),numext::real(a));
    744 }
    745 
    746 /**************************
    747 * Special math functions
    748 ***************************/
    749 
    750 /** \internal \returns the sine of \a a (coeff-wise) */
    751 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    752 Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
    753 
    754 /** \internal \returns the cosine of \a a (coeff-wise) */
    755 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    756 Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
    757 
    758 /** \internal \returns the tan of \a a (coeff-wise) */
    759 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    760 Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
    761 
    762 /** \internal \returns the arc sine of \a a (coeff-wise) */
    763 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    764 Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
    765 
    766 /** \internal \returns the arc cosine of \a a (coeff-wise) */
    767 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    768 Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
    769 
    770 /** \internal \returns the arc tangent of \a a (coeff-wise) */
    771 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    772 Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
    773 
    774 /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
    775 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    776 Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
    777 
    778 /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
    779 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    780 Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
    781 
    782 /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
    783 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    784 Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
    785 
    786 /** \internal \returns the exp of \a a (coeff-wise) */
    787 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    788 Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
    789 
    790 /** \internal \returns the expm1 of \a a (coeff-wise) */
    791 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    792 Packet pexpm1(const Packet& a) { return numext::expm1(a); }
    793 
    794 /** \internal \returns the log of \a a (coeff-wise) */
    795 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    796 Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
    797 
    798 /** \internal \returns the log1p of \a a (coeff-wise) */
    799 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    800 Packet plog1p(const Packet& a) { return numext::log1p(a); }
    801 
    802 /** \internal \returns the log10 of \a a (coeff-wise) */
    803 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    804 Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
    805 
    806 /** \internal \returns the log10 of \a a (coeff-wise) */
    807 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    808 Packet plog2(const Packet& a) {
    809   typedef typename internal::unpacket_traits<Packet>::type Scalar;
    810   return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a)); 
    811 }
    812 
    813 /** \internal \returns the square-root of \a a (coeff-wise) */
    814 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    815 Packet psqrt(const Packet& a) { return numext::sqrt(a); }
    816 
    817 /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
    818 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    819 Packet prsqrt(const Packet& a) {
    820   typedef typename internal::unpacket_traits<Packet>::type Scalar;
    821   return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
    822 }
    823 
    824 /** \internal \returns the rounded value of \a a (coeff-wise) */
    825 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    826 Packet pround(const Packet& a) { using numext::round; return round(a); }
    827 
    828 /** \internal \returns the floor of \a a (coeff-wise) */
    829 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    830 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
    831 
    832 /** \internal \returns the rounded value of \a a (coeff-wise) with current
    833  * rounding mode */
    834 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    835 Packet print(const Packet& a) { using numext::rint; return rint(a); }
    836 
    837 /** \internal \returns the ceil of \a a (coeff-wise) */
    838 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    839 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
    840 
    841 /** \internal \returns the first element of a packet */
    842 template<typename Packet>
    843 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
    844 pfirst(const Packet& a)
    845 { return a; }
    846 
    847 /** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
    848   * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
    849   * For packet-size smaller or equal to 4, this boils down to a noop.
    850   */
    851 template<typename Packet>
    852 EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
    853 predux_half_dowto4(const Packet& a)
    854 { return a; }
    855 
    856 // Slow generic implementation of Packet reduction.
    857 template <typename Packet, typename Op>
    858 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
    859 predux_helper(const Packet& a, Op op) {
    860   typedef typename unpacket_traits<Packet>::type Scalar;
    861   const size_t n = unpacket_traits<Packet>::size;
    862   EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
    863   pstoreu<Scalar>(elements, a);
    864   for(size_t k = n / 2; k > 0; k /= 2)  {
    865     for(size_t i = 0; i < k; ++i) {
    866       elements[i] = op(elements[i], elements[i + k]);
    867     }
    868   }
    869   return elements[0];
    870 }
    871 
    872 /** \internal \returns the sum of the elements of \a a*/
    873 template<typename Packet>
    874 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
    875 predux(const Packet& a)
    876 {
    877   return a;
    878 }
    879 
    880 /** \internal \returns the product of the elements of \a a */
    881 template <typename Packet>
    882 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
    883     const Packet& a) {
    884   typedef typename unpacket_traits<Packet>::type Scalar; 
    885   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
    886 }
    887 
    888 /** \internal \returns the min of the elements of \a a */
    889 template <typename Packet>
    890 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
    891     const Packet &a) {
    892   typedef typename unpacket_traits<Packet>::type Scalar; 
    893   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
    894 }
    895 
    896 template <int NaNPropagation, typename Packet>
    897 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
    898     const Packet& a) {
    899   typedef typename unpacket_traits<Packet>::type Scalar; 
    900   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
    901 }
    902 
    903 /** \internal \returns the min of the elements of \a a */
    904 template <typename Packet>
    905 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
    906     const Packet &a) {
    907   typedef typename unpacket_traits<Packet>::type Scalar; 
    908   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
    909 }
    910 
    911 template <int NaNPropagation, typename Packet>
    912 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
    913     const Packet& a) {
    914   typedef typename unpacket_traits<Packet>::type Scalar; 
    915   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
    916 }
    917 
    918 #undef EIGEN_BINARY_OP_NAN_PROPAGATION
    919 
    920 /** \internal \returns true if all coeffs of \a a means "true"
    921   * It is supposed to be called on values returned by pcmp_*.
    922   */
    923 // not needed yet
    924 // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
    925 // { return bool(a); }
    926 
    927 /** \internal \returns true if any coeffs of \a a means "true"
    928   * It is supposed to be called on values returned by pcmp_*.
    929   */
    930 template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
    931 {
    932   // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
    933   // It is expected that "true" is either:
    934   //  - Scalar(1)
    935   //  - bits full of ones (NaN for floats),
    936   //  - or first bit equals to 1 (1 for ints, smallest denormal for floats).
    937   // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
    938   typedef typename unpacket_traits<Packet>::type Scalar;
    939   return numext::not_equal_strict(predux(a), Scalar(0));
    940 }
    941 
    942 /***************************************************************************
    943 * The following functions might not have to be overwritten for vectorized types
    944 ***************************************************************************/
    945 
    946 /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
    947 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
    948 template<typename Packet>
    949 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
    950 {
    951   pstore(to, pset1<Packet>(a));
    952 }
    953 
    954 /** \internal \returns a * b + c (coeff-wise) */
    955 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
    956 pmadd(const Packet&  a,
    957          const Packet&  b,
    958          const Packet&  c)
    959 { return padd(pmul(a, b),c); }
    960 
    961 /** \internal \returns a packet version of \a *from.
    962   * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
    963 template<typename Packet, int Alignment>
    964 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
    965 {
    966   if(Alignment >= unpacket_traits<Packet>::alignment)
    967     return pload<Packet>(from);
    968   else
    969     return ploadu<Packet>(from);
    970 }
    971 
    972 /** \internal copy the packet \a from to \a *to.
    973   * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
    974 template<typename Scalar, typename Packet, int Alignment>
    975 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
    976 {
    977   if(Alignment >= unpacket_traits<Packet>::alignment)
    978     pstore(to, from);
    979   else
    980     pstoreu(to, from);
    981 }
    982 
    983 /** \internal \returns a packet version of \a *from.
    984   * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
    985   * hardware if available to speedup the loading of data that won't be modified
    986   * by the current computation.
    987   */
    988 template<typename Packet, int LoadMode>
    989 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
    990 {
    991   return ploadt<Packet, LoadMode>(from);
    992 }
    993 
    994 /***************************************************************************
    995 * Fast complex products (GCC generates a function call which is very slow)
    996 ***************************************************************************/
    997 
    998 // Eigen+CUDA does not support complexes.
    999 #if !defined(EIGEN_GPUCC)
   1000 
   1001 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
   1002 { return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
   1003 
   1004 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
   1005 { return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
   1006 
   1007 #endif
   1008 
   1009 
   1010 /***************************************************************************
   1011  * PacketBlock, that is a collection of N packets where the number of words
   1012  * in the packet is a multiple of N.
   1013 ***************************************************************************/
   1014 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
   1015   Packet packet[N];
   1016 };
   1017 
   1018 template<typename Packet> EIGEN_DEVICE_FUNC inline void
   1019 ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
   1020   // Nothing to do in the scalar case, i.e. a 1x1 matrix.
   1021 }
   1022 
   1023 /***************************************************************************
   1024  * Selector, i.e. vector of N boolean values used to select (i.e. blend)
   1025  * words from 2 packets.
   1026 ***************************************************************************/
   1027 template <size_t N> struct Selector {
   1028   bool select[N];
   1029 };
   1030 
   1031 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
   1032 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
   1033   return ifPacket.select[0] ? thenPacket : elsePacket;
   1034 }
   1035 
   1036 } // end namespace internal
   1037 
   1038 } // end namespace Eigen
   1039 
   1040 #endif // EIGEN_GENERIC_PACKET_MATH_H
	cart-elc Source code for CART-ELC
	git clone git://git.laack.co/cart-elc.git
	Log \| Files \| Refs \| README \| LICENSE