cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

PacketMath.h (27786B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Mehdi Goli    Codeplay Software Ltd.
      5 // Ralph Potter  Codeplay Software Ltd.
      6 // Luke Iwanski  Codeplay Software Ltd.
      7 // Contact: <eigen@codeplay.com>
      8 //
      9 // This Source Code Form is subject to the terms of the Mozilla
     10 // Public License v. 2.0. If a copy of the MPL was not distributed
     11 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     12 
     13 /*****************************************************************
     14  * PacketMath.h
     15  *
     16  * \brief:
     17  *  PacketMath
     18  *
     19  *****************************************************************/
     20 
     21 #ifndef EIGEN_PACKET_MATH_SYCL_H
     22 #define EIGEN_PACKET_MATH_SYCL_H
     23 #include <type_traits>
     24 namespace Eigen {
     25 
     26 namespace internal {
     27 #ifdef SYCL_DEVICE_ONLY
     28 
     29 #define SYCL_PLOADT_RO(address_space_target)                                 \
     30   template <typename packet_type, int Alignment>                             \
     31   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro(               \
     32       typename cl::sycl::multi_ptr<                                          \
     33           const typename unpacket_traits<packet_type>::type,                 \
     34           cl::sycl::access::address_space::address_space_target>::pointer_t  \
     35           from) {                                                            \
     36     typedef typename unpacket_traits<packet_type>::type scalar;              \
     37     typedef cl::sycl::multi_ptr<                                             \
     38         scalar, cl::sycl::access::address_space::address_space_target>       \
     39         multi_ptr;                                                           \
     40     auto res = packet_type(                                                  \
     41         static_cast<typename unpacket_traits<packet_type>::type>(0));        \
     42     res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
     43     return res;                                                              \
     44   }
     45 
     46 SYCL_PLOADT_RO(global_space)
     47 SYCL_PLOADT_RO(local_space)
     48 #undef SYCL_PLOADT_RO
     49 #endif
     50 
     51 template <typename packet_type, int Alignment, typename T>
     52 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
     53 ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
     54           cl::sycl::access::mode::read_write, T>& from) {
     55   return ploadt_ro<packet_type, Alignment>(from.get_pointer());
     56 }
     57 
     58 #ifdef SYCL_DEVICE_ONLY
     59 #define SYCL_PLOAD(address_space_target, Alignment, AlignedType)            \
     60   template <typename packet_type>                                           \
     61   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType(     \
     62       typename cl::sycl::multi_ptr<                                         \
     63           const typename unpacket_traits<packet_type>::type,                \
     64           cl::sycl::access::address_space::address_space_target>::pointer_t \
     65           from) {                                                           \
     66     return ploadt_ro<packet_type, Alignment>(from);                         \
     67   }
     68 
     69 // global space
     70 SYCL_PLOAD(global_space, Unaligned, u)
     71 SYCL_PLOAD(global_space, Aligned, )
     72 // local space
     73 SYCL_PLOAD(local_space, Unaligned, u)
     74 SYCL_PLOAD(local_space, Aligned, )
     75 
     76 #undef SYCL_PLOAD
     77 #endif
     78 
     79 #define SYCL_PLOAD(Alignment, AlignedType)                              \
     80   template <typename packet_type>                                       \
     81   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
     82       const Eigen::TensorSycl::internal::RangeAccess<                   \
     83           cl::sycl::access::mode::read_write,                           \
     84           typename unpacket_traits<packet_type>::type>                  \
     85           from) {                                                       \
     86     return ploadt_ro<packet_type, Alignment>(from);                     \
     87   }
     88 SYCL_PLOAD(Unaligned, u)
     89 SYCL_PLOAD(Aligned, )
     90 #undef SYCL_PLOAD
     91 
     92 #ifdef SYCL_DEVICE_ONLY
     93 /** \internal \returns a packet version of \a *from.
     94  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
     95 #define SYCL_PLOADT(address_space_target)                                   \
     96   template <typename packet_type, int Alignment>                            \
     97   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(                 \
     98       typename cl::sycl::multi_ptr<                                         \
     99           const typename unpacket_traits<packet_type>::type,                \
    100           cl::sycl::access::address_space::address_space_target>::pointer_t \
    101           from) {                                                           \
    102     if (Alignment >= unpacket_traits<packet_type>::alignment)               \
    103       return pload<packet_type>(from);                                      \
    104     else                                                                    \
    105       return ploadu<packet_type>(from);                                     \
    106   }
    107 
    108 // global space
    109 SYCL_PLOADT(global_space)
    110 // local space
    111 SYCL_PLOADT(local_space)
    112 #undef SYCL_PLOADT
    113 #endif
    114 
    115 template <typename packet_type, int Alignment>
    116 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
    117 ploadt(const Eigen::TensorSycl::internal::RangeAccess<
    118        cl::sycl::access::mode::read_write,
    119        typename unpacket_traits<packet_type>::type>& from) {
    120   return ploadt<packet_type, Alignment>(from.get_pointer());
    121 }
    122 #ifdef SYCL_DEVICE_ONLY
    123 
    124 // private_space
    125 #define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)                 \
    126   template <>                                                          \
    127   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type                    \
    128   ploadt_ro<packet_type, Alignment>(                                   \
    129       const typename unpacket_traits<packet_type>::type* from) {       \
    130     typedef typename unpacket_traits<packet_type>::type scalar;        \
    131     auto res = packet_type(static_cast<scalar>(0));                    \
    132     res.template load<cl::sycl::access::address_space::private_space>( \
    133         0, const_cast<scalar*>(from));                                 \
    134     return res;                                                        \
    135   }
    136 
    137 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
    138 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
    139 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
    140 SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
    141 
    142 #define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)                    \
    143   template <>                                                              \
    144   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \
    145       const typename unpacket_traits<packet_type>::type* from) {           \
    146     typedef typename unpacket_traits<packet_type>::type scalar;            \
    147     auto res = packet_type(static_cast<scalar>(0));                        \
    148     res.template load<cl::sycl::access::address_space::private_space>(     \
    149         0, const_cast<scalar*>(from));                                     \
    150     return res;                                                            \
    151   }
    152 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
    153 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
    154 SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
    155 SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
    156 
    157 #undef SYCL_PLOAD_SPECIAL
    158 
    159 #define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)   \
    160   template <>                                                               \
    161   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment(             \
    162       typename cl::sycl::multi_ptr<                                         \
    163           scalar,                                                           \
    164           cl::sycl::access::address_space::address_space_target>::pointer_t \
    165           to,                                                               \
    166       const packet_type& from) {                                            \
    167     typedef cl::sycl::multi_ptr<                                            \
    168         scalar, cl::sycl::access::address_space::address_space_target>      \
    169         multi_ptr;                                                          \
    170     from.store(0, multi_ptr(to));                                           \
    171   }
    172 
    173 // global space
    174 SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
    175 SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
    176 SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
    177 SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
    178 SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
    179 SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
    180 SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
    181 SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
    182 
    183 SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
    184 SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
    185 SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
    186 SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
    187 #undef SYCL_PSTORE
    188 
    189 #define SYCL_PSTORE_T(address_space_target)                                 \
    190   template <typename scalar, typename packet_type, int Alignment>           \
    191   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(                       \
    192       typename cl::sycl::multi_ptr<                                         \
    193           scalar,                                                           \
    194           cl::sycl::access::address_space::address_space_target>::pointer_t \
    195           to,                                                               \
    196       const packet_type& from) {                                            \
    197     if (Alignment)                                                          \
    198       pstore(to, from);                                                     \
    199     else                                                                    \
    200       pstoreu(to, from);                                                    \
    201   }
    202 
    203 SYCL_PSTORE_T(global_space)
    204 
    205 SYCL_PSTORE_T(local_space)
    206 
    207 #undef SYCL_PSTORE_T
    208 
    209 #define SYCL_PSET1(packet_type)                                         \
    210   template <>                                                           \
    211   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
    212       const typename unpacket_traits<packet_type>::type& from) {        \
    213     return packet_type(from);                                           \
    214   }
    215 
    216 // global space
    217 SYCL_PSET1(cl::sycl::cl_float4)
    218 SYCL_PSET1(cl::sycl::cl_double2)
    219 
    220 #undef SYCL_PSET1
    221 
    222 template <typename packet_type>
    223 struct get_base_packet {
    224   template <typename sycl_multi_pointer>
    225   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
    226   get_ploaddup(sycl_multi_pointer) {}
    227 
    228   template <typename sycl_multi_pointer>
    229   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
    230   get_pgather(sycl_multi_pointer, Index) {}
    231 };
    232 
    233 template <>
    234 struct get_base_packet<cl::sycl::cl_float4> {
    235   template <typename sycl_multi_pointer>
    236   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
    237       sycl_multi_pointer from) {
    238     return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
    239   }
    240   template <typename sycl_multi_pointer>
    241   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
    242       sycl_multi_pointer from, Index stride) {
    243     return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
    244                                from[2 * stride], from[3 * stride]);
    245   }
    246 
    247   template <typename sycl_multi_pointer>
    248   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
    249       sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
    250     auto tmp = stride;
    251     to[0] = from.x();
    252     to[tmp] = from.y();
    253     to[tmp += stride] = from.z();
    254     to[tmp += stride] = from.w();
    255   }
    256   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
    257       const float& a) {
    258     return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
    259                                static_cast<float>(a + 2),
    260                                static_cast<float>(a + 3));
    261   }
    262 };
    263 
    264 template <>
    265 struct get_base_packet<cl::sycl::cl_double2> {
    266   template <typename sycl_multi_pointer>
    267   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
    268   get_ploaddup(const sycl_multi_pointer from) {
    269     return cl::sycl::cl_double2(from[0], from[0]);
    270   }
    271 
    272   template <typename sycl_multi_pointer, typename Index>
    273   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
    274       const sycl_multi_pointer from, Index stride) {
    275     return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
    276   }
    277 
    278   template <typename sycl_multi_pointer>
    279   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
    280       sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
    281     to[0] = from.x();
    282     to[stride] = from.y();
    283   }
    284 
    285   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
    286       const double& a) {
    287     return cl::sycl::cl_double2(static_cast<double>(a),
    288                                 static_cast<double>(a + 1));
    289   }
    290 };
    291 
    292 #define SYCL_PLOAD_DUP(address_space_target)                                \
    293   template <typename packet_type>                                           \
    294   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup(               \
    295       typename cl::sycl::multi_ptr<                                         \
    296           const typename unpacket_traits<packet_type>::type,                \
    297           cl::sycl::access::address_space::address_space_target>::pointer_t \
    298           from) {                                                           \
    299     return get_base_packet<packet_type>::get_ploaddup(from);                \
    300   }
    301 
    302 // global space
    303 SYCL_PLOAD_DUP(global_space)
    304 // local_space
    305 SYCL_PLOAD_DUP(local_space)
    306 #undef SYCL_PLOAD_DUP
    307 
    308 #define SYCL_PLOAD_DUP_SPECILIZE(packet_type)                              \
    309   template <>                                                              \
    310   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
    311       const typename unpacket_traits<packet_type>::type* from) {           \
    312     return get_base_packet<packet_type>::get_ploaddup(from);               \
    313   }
    314 
    315 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
    316 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
    317 
    318 #undef SYCL_PLOAD_DUP_SPECILIZE
    319 
    320 #define SYCL_PLSET(packet_type)                                         \
    321   template <>                                                           \
    322   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
    323       const typename unpacket_traits<packet_type>::type& a) {           \
    324     return get_base_packet<packet_type>::set_plset(a);                  \
    325   }
    326 
    327 SYCL_PLSET(cl::sycl::cl_float4)
    328 SYCL_PLSET(cl::sycl::cl_double2)
    329 
    330 #undef SYCL_PLSET
    331 
    332 #define SYCL_PGATHER(address_space_target)                                  \
    333   template <typename Scalar, typename packet_type>                          \
    334   EIGEN_DEVICE_FUNC inline packet_type pgather(                             \
    335       typename cl::sycl::multi_ptr<                                         \
    336           const typename unpacket_traits<packet_type>::type,                \
    337           cl::sycl::access::address_space::address_space_target>::pointer_t \
    338           from,                                                             \
    339       Index stride) {                                                       \
    340     return get_base_packet<packet_type>::get_pgather(from, stride);         \
    341   }
    342 
    343 // global space
    344 SYCL_PGATHER(global_space)
    345 // local space
    346 SYCL_PGATHER(local_space)
    347 
    348 #undef SYCL_PGATHER
    349 
    350 #define SYCL_PGATHER_SPECILIZE(scalar, packet_type)                            \
    351   template <>                                                                  \
    352   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type                            \
    353   pgather<scalar, packet_type>(                                                \
    354       const typename unpacket_traits<packet_type>::type* from, Index stride) { \
    355     return get_base_packet<packet_type>::get_pgather(from, stride);            \
    356   }
    357 
    358 SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
    359 SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
    360 
    361 #undef SYCL_PGATHER_SPECILIZE
    362 
    363 #define SYCL_PSCATTER(address_space_target)                                 \
    364   template <typename Scalar, typename packet_type>                          \
    365   EIGEN_DEVICE_FUNC inline void pscatter(                                   \
    366       typename cl::sycl::multi_ptr<                                         \
    367           typename unpacket_traits<packet_type>::type,                      \
    368           cl::sycl::access::address_space::address_space_target>::pointer_t \
    369           to,                                                               \
    370       const packet_type& from, Index stride) {                              \
    371     get_base_packet<packet_type>::set_pscatter(to, from, stride);           \
    372   }
    373 
    374 // global space
    375 SYCL_PSCATTER(global_space)
    376 // local space
    377 SYCL_PSCATTER(local_space)
    378 
    379 #undef SYCL_PSCATTER
    380 
    381 #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)                        \
    382   template <>                                                               \
    383   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
    384       typename unpacket_traits<packet_type>::type * to,                     \
    385       const packet_type& from, Index stride) {                              \
    386     get_base_packet<packet_type>::set_pscatter(to, from, stride);           \
    387   }
    388 
    389 SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
    390 SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
    391 
    392 #undef SYCL_PSCATTER_SPECILIZE
    393 
    394 #define SYCL_PMAD(packet_type)                                            \
    395   template <>                                                             \
    396   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd(                \
    397       const packet_type& a, const packet_type& b, const packet_type& c) { \
    398     return cl::sycl::mad(a, b, c);                                        \
    399   }
    400 
    401 SYCL_PMAD(cl::sycl::cl_float4)
    402 SYCL_PMAD(cl::sycl::cl_double2)
    403 #undef SYCL_PMAD
    404 
    405 template <>
    406 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
    407     const cl::sycl::cl_float4& a) {
    408   return a.x();
    409 }
    410 template <>
    411 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
    412     const cl::sycl::cl_double2& a) {
    413   return a.x();
    414 }
    415 
    416 template <>
    417 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
    418     const cl::sycl::cl_float4& a) {
    419   return a.x() + a.y() + a.z() + a.w();
    420 }
    421 
    422 template <>
    423 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
    424     const cl::sycl::cl_double2& a) {
    425   return a.x() + a.y();
    426 }
    427 
    428 template <>
    429 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
    430     const cl::sycl::cl_float4& a) {
    431   return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
    432                         cl::sycl::fmax(a.z(), a.w()));
    433 }
    434 template <>
    435 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
    436     const cl::sycl::cl_double2& a) {
    437   return cl::sycl::fmax(a.x(), a.y());
    438 }
    439 
    440 template <>
    441 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
    442     const cl::sycl::cl_float4& a) {
    443   return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
    444                         cl::sycl::fmin(a.z(), a.w()));
    445 }
    446 template <>
    447 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
    448     const cl::sycl::cl_double2& a) {
    449   return cl::sycl::fmin(a.x(), a.y());
    450 }
    451 
    452 template <>
    453 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
    454     const cl::sycl::cl_float4& a) {
    455   return a.x() * a.y() * a.z() * a.w();
    456 }
    457 template <>
    458 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
    459     const cl::sycl::cl_double2& a) {
    460   return a.x() * a.y();
    461 }
    462 
    463 template <>
    464 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
    465 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
    466   return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
    467                              cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
    468 }
    469 template <>
    470 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
    471 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
    472   return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
    473 }
    474 
    475 template <typename Packet>
    476 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,
    477                                                           const Packet &b) {
    478   return ((a <= b)
    479               .template convert<typename unpacket_traits<Packet>::type,
    480                                 cl::sycl::rounding_mode::automatic>());
    481 }
    482 
    483 template <typename Packet>
    484 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,
    485                                                           const Packet &b) {
    486   return ((a < b)
    487               .template convert<typename unpacket_traits<Packet>::type,
    488                                 cl::sycl::rounding_mode::automatic>());
    489 }
    490 
    491 template <typename Packet>
    492 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,
    493                                                           const Packet &b) {
    494   return ((a == b)
    495               .template convert<typename unpacket_traits<Packet>::type,
    496                                 cl::sycl::rounding_mode::automatic>());
    497 }
    498 
    499 #define SYCL_PCMP(OP, TYPE)                                                    \
    500   template <>                                                                  \
    501   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a,    \
    502                                                              const TYPE &b) {  \
    503     return sycl_pcmp_##OP<TYPE>(a, b);                                         \
    504   }
    505 
    506 SYCL_PCMP(le, cl::sycl::cl_float4)
    507 SYCL_PCMP(lt, cl::sycl::cl_float4)
    508 SYCL_PCMP(eq, cl::sycl::cl_float4)
    509 SYCL_PCMP(le, cl::sycl::cl_double2)
    510 SYCL_PCMP(lt, cl::sycl::cl_double2)
    511 SYCL_PCMP(eq, cl::sycl::cl_double2)
    512 #undef SYCL_PCMP
    513 
    514 template <typename T> struct convert_to_integer;
    515 
    516 template <> struct convert_to_integer<float> {
    517   using type = std::int32_t;
    518   using packet_type = cl::sycl::cl_int4;
    519 };
    520 template <> struct convert_to_integer<double> {
    521   using type = std::int64_t;
    522   using packet_type = cl::sycl::cl_long2;
    523 };
    524 
    525 template <typename PacketIn>
    526 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<
    527     typename unpacket_traits<PacketIn>::type>::packet_type
    528 vector_as_int(const PacketIn &p) {
    529   return (
    530       p.template convert<typename convert_to_integer<
    531                              typename unpacket_traits<PacketIn>::type>::type,
    532                          cl::sycl::rounding_mode::automatic>());
    533 }
    534 
    535 template <typename packetOut, typename PacketIn>
    536 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut
    537 convert_vector(const PacketIn &p) {
    538   return (p.template convert<typename unpacket_traits<packetOut>::type,
    539                              cl::sycl::rounding_mode::automatic>());
    540 }
    541 
    542 #define SYCL_PAND(TYPE)                                                        \
    543   template <>                                                                  \
    544   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a,         \
    545                                                         const TYPE &b) {       \
    546     return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b));          \
    547   }
    548 SYCL_PAND(cl::sycl::cl_float4)
    549 SYCL_PAND(cl::sycl::cl_double2)
    550 #undef SYCL_PAND
    551 
    552 #define SYCL_POR(TYPE)                                                         \
    553   template <>                                                                  \
    554   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a,          \
    555                                                        const TYPE &b) {        \
    556     return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b));          \
    557   }
    558 
    559 SYCL_POR(cl::sycl::cl_float4)
    560 SYCL_POR(cl::sycl::cl_double2)
    561 #undef SYCL_POR
    562 
    563 #define SYCL_PXOR(TYPE)                                                        \
    564   template <>                                                                  \
    565   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a,         \
    566                                                         const TYPE &b) {       \
    567     return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b));          \
    568   }
    569 
    570 SYCL_PXOR(cl::sycl::cl_float4)
    571 SYCL_PXOR(cl::sycl::cl_double2)
    572 #undef SYCL_PXOR
    573 
    574 #define SYCL_PANDNOT(TYPE)                                                     \
    575   template <>                                                                  \
    576   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a,      \
    577                                                            const TYPE &b) {    \
    578     return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b)));       \
    579   }
    580 SYCL_PANDNOT(cl::sycl::cl_float4)
    581 SYCL_PANDNOT(cl::sycl::cl_double2)
    582 #undef SYCL_PANDNOT
    583 
    584 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
    585     PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
    586   float tmp = kernel.packet[0].y();
    587   kernel.packet[0].y() = kernel.packet[1].x();
    588   kernel.packet[1].x() = tmp;
    589 
    590   tmp = kernel.packet[0].z();
    591   kernel.packet[0].z() = kernel.packet[2].x();
    592   kernel.packet[2].x() = tmp;
    593 
    594   tmp = kernel.packet[0].w();
    595   kernel.packet[0].w() = kernel.packet[3].x();
    596   kernel.packet[3].x() = tmp;
    597 
    598   tmp = kernel.packet[1].z();
    599   kernel.packet[1].z() = kernel.packet[2].y();
    600   kernel.packet[2].y() = tmp;
    601 
    602   tmp = kernel.packet[1].w();
    603   kernel.packet[1].w() = kernel.packet[3].y();
    604   kernel.packet[3].y() = tmp;
    605 
    606   tmp = kernel.packet[2].w();
    607   kernel.packet[2].w() = kernel.packet[3].z();
    608   kernel.packet[3].z() = tmp;
    609 }
    610 
    611 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
    612     PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
    613   double tmp = kernel.packet[0].y();
    614   kernel.packet[0].y() = kernel.packet[1].x();
    615   kernel.packet[1].x() = tmp;
    616 }
    617 
    618 template <>
    619 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
    620     const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
    621     const cl::sycl::cl_float4& thenPacket,
    622     const cl::sycl::cl_float4& elsePacket) {
    623   cl::sycl::cl_int4 condition(
    624       ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
    625       ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
    626   return cl::sycl::select(thenPacket, elsePacket, condition);
    627 }
    628 
    629 template <>
    630 inline cl::sycl::cl_double2 pblend(
    631     const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
    632     const cl::sycl::cl_double2& thenPacket,
    633     const cl::sycl::cl_double2& elsePacket) {
    634   cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
    635                                ifPacket.select[1] ? 0 : -1);
    636   return cl::sycl::select(thenPacket, elsePacket, condition);
    637 }
    638 #endif  // SYCL_DEVICE_ONLY
    639 
    640 #define SYCL_PSTORE(alignment)                                  \
    641   template <typename packet_type>                               \
    642   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
    643       const Eigen::TensorSycl::internal::RangeAccess<           \
    644           cl::sycl::access::mode::read_write,                   \
    645           typename unpacket_traits<packet_type>::type>& to,     \
    646       const packet_type& from) {                                \
    647     pstore##alignment(to.get_pointer(), from);                  \
    648   }
    649 
    650 // global space
    651 SYCL_PSTORE()
    652 SYCL_PSTORE(u)
    653 
    654 #undef SYCL_PSTORE
    655 
    656 template <typename scalar, typename packet_type, int Alignment>
    657 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(
    658     Eigen::TensorSycl::internal::RangeAccess<
    659         cl::sycl::access::mode::read_write,
    660         typename unpacket_traits<packet_type>::type>
    661         to,
    662     const packet_type& from) {
    663   pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
    664 }
    665 
    666 }  // end namespace internal
    667 
    668 }  // end namespace Eigen
    669 
    670 #endif  // EIGEN_PACKET_MATH_SYCL_H