cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

Complex.h (22503B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
      5 // Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_COMPLEX_NEON_H
     12 #define EIGEN_COMPLEX_NEON_H
     13 
     14 namespace Eigen {
     15 
     16 namespace internal {
     17 
     18 inline uint32x4_t p4ui_CONJ_XOR()
     19 {
     20 // See bug 1325, clang fails to call vld1q_u64.
     21 #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
     22   uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
     23   return ret;
     24 #else
     25   static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
     26   return vld1q_u32( conj_XOR_DATA );
     27 #endif
     28 }
     29 
     30 inline uint32x2_t p2ui_CONJ_XOR()
     31 {
     32   static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
     33   return vld1_u32( conj_XOR_DATA );
     34 }
     35 
     36 //---------- float ----------
     37 
     38 struct Packet1cf
     39 {
     40   EIGEN_STRONG_INLINE Packet1cf() {}
     41   EIGEN_STRONG_INLINE explicit Packet1cf(const Packet2f& a) : v(a) {}
     42   Packet2f v;
     43 };
     44 struct Packet2cf
     45 {
     46   EIGEN_STRONG_INLINE Packet2cf() {}
     47   EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
     48   Packet4f v;
     49 };
     50 
     51 template<> struct packet_traits<std::complex<float> > : default_packet_traits
     52 {
     53   typedef Packet2cf type;
     54   typedef Packet1cf half;
     55   enum
     56   {
     57     Vectorizable = 1,
     58     AlignedOnScalar = 1,
     59     size = 2,
     60     HasHalfPacket = 1,
     61 
     62     HasAdd       = 1,
     63     HasSub       = 1,
     64     HasMul       = 1,
     65     HasDiv       = 1,
     66     HasNegate    = 1,
     67     HasAbs       = 0,
     68     HasAbs2      = 0,
     69     HasMin       = 0,
     70     HasMax       = 0,
     71     HasSetLinear = 0
     72   };
     73 };
     74 
     75 template<> struct unpacket_traits<Packet1cf>
     76 {
     77   typedef std::complex<float> type;
     78   typedef Packet1cf half;
     79   typedef Packet2f as_real;
     80   enum
     81   {
     82     size = 1,
     83     alignment = Aligned16,
     84     vectorizable = true,
     85     masked_load_available = false,
     86     masked_store_available = false
     87   };
     88 };
     89 template<> struct unpacket_traits<Packet2cf>
     90 {
     91   typedef std::complex<float> type;
     92   typedef Packet1cf half;
     93   typedef Packet4f as_real;
     94   enum
     95   {
     96     size = 2,
     97     alignment = Aligned16,
     98     vectorizable = true,
     99     masked_load_available = false,
    100     masked_store_available = false
    101   };
    102 };
    103 
    104 template<> EIGEN_STRONG_INLINE Packet1cf pcast<float,Packet1cf>(const float& a)
    105 { return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0)); }
    106 template<> EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f,Packet2cf>(const Packet2f& a)
    107 { return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); }
    108 
    109 template<> EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from)
    110 { return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from))); }
    111 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
    112 {
    113   const float32x2_t r64 = vld1_f32(reinterpret_cast<const float*>(&from));
    114   return Packet2cf(vcombine_f32(r64, r64));
    115 }
    116 
    117 template<> EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
    118 { return Packet1cf(padd<Packet2f>(a.v, b.v)); }
    119 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
    120 { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
    121 
    122 template<> EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
    123 { return Packet1cf(psub<Packet2f>(a.v, b.v)); }
    124 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
    125 { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
    126 
    127 template<> EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) { return Packet1cf(pnegate<Packet2f>(a.v)); }
    128 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
    129 
    130 template<> EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a)
    131 {
    132   const Packet2ui b = vreinterpret_u32_f32(a.v);
    133   return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
    134 }
    135 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
    136 {
    137   const Packet4ui b = vreinterpretq_u32_f32(a.v);
    138   return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
    139 }
    140 
    141 template<> EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
    142 {
    143   Packet2f v1, v2;
    144 
    145   // Get the real values of a | a1_re | a1_re |
    146   v1 = vdup_lane_f32(a.v, 0);
    147   // Get the imag values of a | a1_im | a1_im |
    148   v2 = vdup_lane_f32(a.v, 1);
    149   // Multiply the real a with b
    150   v1 = vmul_f32(v1, b.v);
    151   // Multiply the imag a with b
    152   v2 = vmul_f32(v2, b.v);
    153   // Conjugate v2
    154   v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
    155   // Swap real/imag elements in v2.
    156   v2 = vrev64_f32(v2);
    157   // Add and return the result
    158   return Packet1cf(vadd_f32(v1, v2));
    159 }
    160 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
    161 {
    162   Packet4f v1, v2;
    163 
    164   // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
    165   v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
    166   // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
    167   v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
    168   // Multiply the real a with b
    169   v1 = vmulq_f32(v1, b.v);
    170   // Multiply the imag a with b
    171   v2 = vmulq_f32(v2, b.v);
    172   // Conjugate v2
    173   v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
    174   // Swap real/imag elements in v2.
    175   v2 = vrev64q_f32(v2);
    176   // Add and return the result
    177   return Packet2cf(vaddq_f32(v1, v2));
    178 }
    179 
    180 template<> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b)
    181 {
    182   // Compare real and imaginary parts of a and b to get the mask vector:
    183   // [re(a[0])==re(b[0]), im(a[0])==im(b[0])]
    184   Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v);
    185   // Swap real/imag elements in the mask in to get:
    186   // [im(a[0])==im(b[0]), re(a[0])==re(b[0])]
    187   Packet2f eq_swapped = vrev64_f32(eq);
    188   // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
    189   return Packet1cf(pand<Packet2f>(eq, eq_swapped));
    190 }
    191 template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
    192 {
    193   // Compare real and imaginary parts of a and b to get the mask vector:
    194   // [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]
    195   Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);
    196   // Swap real/imag elements in the mask in to get:
    197   // [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])]
    198   Packet4f eq_swapped = vrev64q_f32(eq);
    199   // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
    200   return Packet2cf(pand<Packet4f>(eq, eq_swapped));
    201 }
    202 
    203 template<> EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
    204 { return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
    205 template<> EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
    206 { return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
    207 
    208 template<> EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
    209 { return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
    210 template<> EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
    211 { return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
    212 
    213 template<> EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
    214 { return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
    215 template<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
    216 { return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
    217 
    218 template<> EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
    219 { return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
    220 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
    221 { return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
    222 
    223 template<> EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from)
    224 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload<Packet2f>((const float*)from)); }
    225 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from)
    226 { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(reinterpret_cast<const float*>(from))); }
    227 
    228 template<> EIGEN_STRONG_INLINE Packet1cf ploadu<Packet1cf>(const std::complex<float>* from)
    229 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cf(ploadu<Packet2f>((const float*)from)); }
    230 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from)
    231 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(reinterpret_cast<const float*>(from))); }
    232 
    233 template<> EIGEN_STRONG_INLINE Packet1cf ploaddup<Packet1cf>(const std::complex<float>* from)
    234 { return pset1<Packet1cf>(*from); }
    235 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
    236 { return pset1<Packet2cf>(*from); }
    237 
    238 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
    239 { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
    240 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
    241 { EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(to), from.v); }
    242 
    243 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
    244 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
    245 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
    246 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v); }
    247 
    248 template<> EIGEN_DEVICE_FUNC inline Packet1cf pgather<std::complex<float>, Packet1cf>(
    249     const std::complex<float>* from, Index stride)
    250 {
    251   const Packet2f tmp = vdup_n_f32(std::real(from[0*stride]));
    252   return Packet1cf(vset_lane_f32(std::imag(from[0*stride]), tmp, 1));
    253 }
    254 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(
    255     const std::complex<float>* from, Index stride)
    256 {
    257   Packet4f res = vdupq_n_f32(std::real(from[0*stride]));
    258   res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
    259   res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
    260   res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
    261   return Packet2cf(res);
    262 }
    263 
    264 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet1cf>(
    265     std::complex<float>* to, const Packet1cf& from, Index stride)
    266 { to[stride*0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1)); }
    267 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(
    268     std::complex<float>* to, const Packet2cf& from, Index stride)
    269 {
    270   to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
    271   to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
    272 }
    273 
    274 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *addr)
    275 { EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr)); }
    276 
    277 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet1cf>(const Packet1cf& a)
    278 {
    279   EIGEN_ALIGN16 std::complex<float> x;
    280   vst1_f32(reinterpret_cast<float*>(&x), a.v);
    281   return x;
    282 }
    283 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
    284 {
    285   EIGEN_ALIGN16 std::complex<float> x[2];
    286   vst1q_f32(reinterpret_cast<float*>(x), a.v);
    287   return x[0];
    288 }
    289 
    290 template<> EIGEN_STRONG_INLINE Packet1cf preverse(const Packet1cf& a) { return a; }
    291 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
    292 { return Packet2cf(vcombine_f32(vget_high_f32(a.v), vget_low_f32(a.v))); }
    293 
    294 template<> EIGEN_STRONG_INLINE Packet1cf pcplxflip<Packet1cf>(const Packet1cf& a)
    295 { return Packet1cf(vrev64_f32(a.v)); }
    296 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
    297 { return Packet2cf(vrev64q_f32(a.v)); }
    298 
    299 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet1cf>(const Packet1cf& a)
    300 {
    301   std::complex<float> s;
    302   vst1_f32((float *)&s, a.v);
    303   return s;
    304 }
    305 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
    306 {
    307   std::complex<float> s;
    308   vst1_f32(reinterpret_cast<float*>(&s), vadd_f32(vget_low_f32(a.v), vget_high_f32(a.v)));
    309   return s;
    310 }
    311 
    312 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a)
    313 {
    314   std::complex<float> s;
    315   vst1_f32((float *)&s, a.v);
    316   return s;
    317 }
    318 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
    319 {
    320   float32x2_t a1, a2, v1, v2, prod;
    321   std::complex<float> s;
    322 
    323   a1 = vget_low_f32(a.v);
    324   a2 = vget_high_f32(a.v);
    325    // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
    326   v1 = vdup_lane_f32(a1, 0);
    327   // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
    328   v2 = vdup_lane_f32(a1, 1);
    329   // Multiply the real a with b
    330   v1 = vmul_f32(v1, a2);
    331   // Multiply the imag a with b
    332   v2 = vmul_f32(v2, a2);
    333   // Conjugate v2
    334   v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
    335   // Swap real/imag elements in v2.
    336   v2 = vrev64_f32(v2);
    337   // Add v1, v2
    338   prod = vadd_f32(v1, v2);
    339 
    340   vst1_f32(reinterpret_cast<float*>(&s), prod);
    341 
    342   return s;
    343 }
    344 
    345 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf,Packet2f)
    346 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
    347 
    348 template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
    349 {
    350   // TODO optimize it for NEON
    351   Packet1cf res = pmul(a, pconj(b));
    352   Packet2f s, rev_s;
    353 
    354   // this computes the norm
    355   s = vmul_f32(b.v, b.v);
    356   rev_s = vrev64_f32(s);
    357 
    358   return Packet1cf(pdiv<Packet2f>(res.v, vadd_f32(s, rev_s)));
    359 }
    360 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
    361 {
    362   // TODO optimize it for NEON
    363   Packet2cf res = pmul(a,pconj(b));
    364   Packet4f s, rev_s;
    365 
    366   // this computes the norm
    367   s = vmulq_f32(b.v, b.v);
    368   rev_s = vrev64q_f32(s);
    369 
    370   return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s, rev_s)));
    371 }
    372 
    373 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& /*kernel*/) {}
    374 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel)
    375 {
    376   Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
    377   kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
    378   kernel.packet[1].v = tmp;
    379 }
    380 
    381 template<> EIGEN_STRONG_INLINE Packet1cf psqrt<Packet1cf>(const Packet1cf& a) {
    382   return psqrt_complex<Packet1cf>(a);
    383 }
    384 
    385 template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
    386   return psqrt_complex<Packet2cf>(a);
    387 }
    388 
    389 //---------- double ----------
    390 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
    391 
    392 // See bug 1325, clang fails to call vld1q_u64.
    393 #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
    394   static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
    395 #else
    396   const uint64_t  p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
    397   static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
    398 #endif
    399 
    400 struct Packet1cd
    401 {
    402   EIGEN_STRONG_INLINE Packet1cd() {}
    403   EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
    404   Packet2d v;
    405 };
    406 
    407 template<> struct packet_traits<std::complex<double> >  : default_packet_traits
    408 {
    409   typedef Packet1cd type;
    410   typedef Packet1cd half;
    411   enum
    412   {
    413     Vectorizable = 1,
    414     AlignedOnScalar = 0,
    415     size = 1,
    416     HasHalfPacket = 0,
    417 
    418     HasAdd    = 1,
    419     HasSub    = 1,
    420     HasMul    = 1,
    421     HasDiv    = 1,
    422     HasNegate = 1,
    423     HasAbs    = 0,
    424     HasAbs2   = 0,
    425     HasMin    = 0,
    426     HasMax    = 0,
    427     HasSetLinear = 0
    428   };
    429 };
    430 
    431 template<> struct unpacket_traits<Packet1cd>
    432 {
    433   typedef std::complex<double> type;
    434   typedef Packet1cd half;
    435   typedef Packet2d as_real;
    436   enum
    437   {
    438     size=1,
    439     alignment=Aligned16,
    440     vectorizable=true,
    441     masked_load_available=false,
    442     masked_store_available=false
    443   };
    444 };
    445 
    446 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from)
    447 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>(reinterpret_cast<const double*>(from))); }
    448 
    449 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
    450 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from))); }
    451 
    452 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
    453 {
    454   /* here we really have to use unaligned loads :( */
    455   return ploadu<Packet1cd>(&from);
    456 }
    457 
    458 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
    459 { return Packet1cd(padd<Packet2d>(a.v, b.v)); }
    460 
    461 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
    462 { return Packet1cd(psub<Packet2d>(a.v, b.v)); }
    463 
    464 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a)
    465 { return Packet1cd(pnegate<Packet2d>(a.v)); }
    466 
    467 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
    468 { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
    469 
    470 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
    471 {
    472   Packet2d v1, v2;
    473 
    474   // Get the real values of a
    475   v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
    476   // Get the imag values of a
    477   v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
    478   // Multiply the real a with b
    479   v1 = vmulq_f64(v1, b.v);
    480   // Multiply the imag a with b
    481   v2 = vmulq_f64(v2, b.v);
    482   // Conjugate v2
    483   v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
    484   // Swap real/imag elements in v2.
    485   v2 = preverse<Packet2d>(v2);
    486   // Add and return the result
    487   return Packet1cd(vaddq_f64(v1, v2));
    488 }
    489 
    490 template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
    491 {
    492   // Compare real and imaginary parts of a and b to get the mask vector:
    493   // [re(a)==re(b), im(a)==im(b)]
    494   Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);
    495   // Swap real/imag elements in the mask in to get:
    496   // [im(a)==im(b), re(a)==re(b)]
    497   Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq)));
    498   // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped
    499   return Packet1cd(pand<Packet2d>(eq, eq_swapped));
    500 }
    501 
    502 template<> EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
    503 { return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
    504 
    505 template<> EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
    506 { return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
    507 
    508 template<> EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
    509 { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
    510 
    511 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
    512 { return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
    513 
    514 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
    515 { return pset1<Packet1cd>(*from); }
    516 
    517 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
    518 { EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<double*>(to), from.v); }
    519 
    520 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
    521 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v); }
    522 
    523 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *addr)
    524 { EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr)); }
    525 
    526 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(
    527     const std::complex<double>* from, Index stride)
    528 {
    529   Packet2d res = pset1<Packet2d>(0.0);
    530   res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
    531   res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
    532   return Packet1cd(res);
    533 }
    534 
    535 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(
    536     std::complex<double>* to, const Packet1cd& from, Index stride)
    537 { to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); }
    538 
    539 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
    540 {
    541   EIGEN_ALIGN16 std::complex<double> res;
    542   pstore<std::complex<double> >(&res, a);
    543   return res;
    544 }
    545 
    546 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
    547 
    548 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
    549 
    550 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
    551 
    552 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
    553 
    554 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
    555 {
    556   // TODO optimize it for NEON
    557   Packet1cd res = pmul(a,pconj(b));
    558   Packet2d s = pmul<Packet2d>(b.v, b.v);
    559   Packet2d rev_s = preverse<Packet2d>(s);
    560 
    561   return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
    562 }
    563 
    564 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
    565 { return Packet1cd(preverse(Packet2d(x.v))); }
    566 
    567 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
    568 {
    569   Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
    570   kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
    571   kernel.packet[1].v = tmp;
    572 }
    573 
    574 template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
    575   return psqrt_complex<Packet1cd>(a);
    576 }
    577 
    578 #endif // EIGEN_ARCH_ARM64
    579 
    580 } // end namespace internal
    581 
    582 } // end namespace Eigen
    583 
    584 #endif // EIGEN_COMPLEX_NEON_H