cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

ConfigureVectorization.h (19876B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
      5 // Copyright (C) 2020, Arm Limited and Contributors
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
     12 #define EIGEN_CONFIGURE_VECTORIZATION_H
     13 
     14 //------------------------------------------------------------------------------------------
     15 // Static and dynamic alignment control
     16 //
     17 // The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
     18 // as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
     19 // The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
     20 // a default value is automatically computed based on architecture, compiler, and OS.
     21 //
     22 // This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
     23 // to be used to declare statically aligned buffers.
     24 //------------------------------------------------------------------------------------------
     25 
     26 
     27 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
     28  * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
     29  * so that vectorization doesn't affect binary compatibility.
     30  *
     31  * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
     32  * vectorized and non-vectorized code.
     33  * 
     34  * FIXME: this code can be cleaned up once we switch to proper C++11 only.
     35  */
     36 #if (defined EIGEN_CUDACC)
     37   #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
     38   #define EIGEN_ALIGNOF(x) __alignof(x)
     39 #elif EIGEN_HAS_ALIGNAS
     40   #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
     41   #define EIGEN_ALIGNOF(x) alignof(x)
     42 #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
     43   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
     44   #define EIGEN_ALIGNOF(x) __alignof(x)
     45 #elif EIGEN_COMP_MSVC
     46   #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
     47   #define EIGEN_ALIGNOF(x) __alignof(x)
     48 #elif EIGEN_COMP_SUNCC
     49   // FIXME not sure about this one:
     50   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
     51   #define EIGEN_ALIGNOF(x) __alignof(x)
     52 #else
     53   #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
     54 #endif
     55 
     56 // If the user explicitly disable vectorization, then we also disable alignment
     57 #if defined(EIGEN_DONT_VECTORIZE)
     58   #if defined(EIGEN_GPUCC)
     59     // GPU code is always vectorized and requires memory alignment for
     60     // statically allocated buffers.
     61     #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
     62   #else
     63     #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
     64   #endif
     65 #elif defined(__AVX512F__)
     66   // 64 bytes static alignment is preferred only if really required
     67   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
     68 #elif defined(__AVX__)
     69   // 32 bytes static alignment is preferred only if really required
     70   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
     71 #else
     72   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
     73 #endif
     74 
     75 
     76 // EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
     77 #define EIGEN_MIN_ALIGN_BYTES 16
     78 
     79 // Defined the boundary (in bytes) on which the data needs to be aligned. Note
     80 // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
     81 // aligned at all regardless of the value of this #define.
     82 
     83 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN))  && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
     84 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
     85 #endif
     86 
     87 // EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
     88 // They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
     89 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
     90   #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
     91     #undef EIGEN_MAX_STATIC_ALIGN_BYTES
     92   #endif
     93   #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
     94 #endif
     95 
     96 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
     97 
     98   // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
     99 
    100   // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
    101   // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
    102   // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
    103   // certain common platform (compiler+architecture combinations) to avoid these problems.
    104   // Only static alignment is really problematic (relies on nonstandard compiler extensions),
    105   // try to keep heap alignment even when we have to disable static alignment.
    106   #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
    107   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
    108   #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
    109   // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
    110   // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
    111   // 4.8 and newer seem definitely unaffected.
    112   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
    113   #else
    114   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
    115   #endif
    116 
    117   // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
    118   #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
    119   && !EIGEN_GCC3_OR_OLDER \
    120   && !EIGEN_COMP_SUNCC \
    121   && !EIGEN_OS_QNX
    122     #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
    123   #else
    124     #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
    125   #endif
    126 
    127   #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
    128     #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
    129   #else
    130     #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
    131   #endif
    132 
    133 #endif
    134 
    135 // If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
    136 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
    137 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
    138 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
    139 #endif
    140 
    141 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
    142   #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
    143 #endif
    144 
    145 // At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
    146 // It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES)
    147 // and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
    148 // Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
    149 
    150 
    151 // Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
    152 #define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)
    153 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
    154 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
    155 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
    156 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
    157 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
    158 #else
    159 #define EIGEN_ALIGN_MAX
    160 #endif
    161 
    162 
    163 // Dynamic alignment control
    164 
    165 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
    166 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
    167 #endif
    168 
    169 #ifdef EIGEN_DONT_ALIGN
    170   #ifdef EIGEN_MAX_ALIGN_BYTES
    171     #undef EIGEN_MAX_ALIGN_BYTES
    172   #endif
    173   #define EIGEN_MAX_ALIGN_BYTES 0
    174 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
    175   #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
    176 #endif
    177 
    178 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
    179 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
    180 #else
    181 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
    182 #endif
    183 
    184 
    185 #ifndef EIGEN_UNALIGNED_VECTORIZE
    186 #define EIGEN_UNALIGNED_VECTORIZE 1
    187 #endif
    188 
    189 //----------------------------------------------------------------------
    190 
    191 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
    192 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
    193 #if EIGEN_MAX_ALIGN_BYTES==0
    194   #ifndef EIGEN_DONT_VECTORIZE
    195     #define EIGEN_DONT_VECTORIZE
    196   #endif
    197 #endif
    198 
    199 
    200 // The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
    201 // removed as gcc 4.1 and msvc 2008 are not supported anyways.
    202 #if EIGEN_COMP_MSVC
    203   #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
    204   #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
    205     // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
    206     #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
    207       #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
    208     #endif
    209   #endif
    210 #else
    211   #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
    212     #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
    213   #endif
    214 #endif
    215 
    216 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
    217 
    218   #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
    219 
    220     // Defines symbols for compile-time detection of which instructions are
    221     // used.
    222     // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
    223     #define EIGEN_VECTORIZE
    224     #define EIGEN_VECTORIZE_SSE
    225     #define EIGEN_VECTORIZE_SSE2
    226 
    227     // Detect sse3/ssse3/sse4:
    228     // gcc and icc defines __SSE3__, ...
    229     // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
    230     // want to force the use of those instructions with msvc.
    231     #ifdef __SSE3__
    232       #define EIGEN_VECTORIZE_SSE3
    233     #endif
    234     #ifdef __SSSE3__
    235       #define EIGEN_VECTORIZE_SSSE3
    236     #endif
    237     #ifdef __SSE4_1__
    238       #define EIGEN_VECTORIZE_SSE4_1
    239     #endif
    240     #ifdef __SSE4_2__
    241       #define EIGEN_VECTORIZE_SSE4_2
    242     #endif
    243     #ifdef __AVX__
    244       #ifndef EIGEN_USE_SYCL 
    245         #define EIGEN_VECTORIZE_AVX
    246       #endif
    247       #define EIGEN_VECTORIZE_SSE3
    248       #define EIGEN_VECTORIZE_SSSE3
    249       #define EIGEN_VECTORIZE_SSE4_1
    250       #define EIGEN_VECTORIZE_SSE4_2
    251     #endif
    252     #ifdef __AVX2__
    253       #ifndef EIGEN_USE_SYCL 
    254         #define EIGEN_VECTORIZE_AVX2
    255         #define EIGEN_VECTORIZE_AVX
    256       #endif
    257       #define EIGEN_VECTORIZE_SSE3
    258       #define EIGEN_VECTORIZE_SSSE3
    259       #define EIGEN_VECTORIZE_SSE4_1
    260       #define EIGEN_VECTORIZE_SSE4_2
    261     #endif
    262     #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
    263       // MSVC does not expose a switch dedicated for FMA
    264       // For MSVC, AVX2 => FMA
    265       #define EIGEN_VECTORIZE_FMA
    266     #endif
    267     #if defined(__AVX512F__)
    268       #ifndef EIGEN_VECTORIZE_FMA
    269       #if EIGEN_COMP_GNUC
    270       #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
    271       #else
    272       #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
    273       #endif
    274       #endif
    275       #ifndef EIGEN_USE_SYCL
    276         #define EIGEN_VECTORIZE_AVX512
    277         #define EIGEN_VECTORIZE_AVX2
    278         #define EIGEN_VECTORIZE_AVX
    279       #endif
    280       #define EIGEN_VECTORIZE_FMA
    281       #define EIGEN_VECTORIZE_SSE3
    282       #define EIGEN_VECTORIZE_SSSE3
    283       #define EIGEN_VECTORIZE_SSE4_1
    284       #define EIGEN_VECTORIZE_SSE4_2
    285       #ifndef EIGEN_USE_SYCL
    286         #ifdef __AVX512DQ__
    287           #define EIGEN_VECTORIZE_AVX512DQ
    288         #endif
    289         #ifdef __AVX512ER__
    290           #define EIGEN_VECTORIZE_AVX512ER
    291         #endif
    292         #ifdef __AVX512BF16__
    293           #define EIGEN_VECTORIZE_AVX512BF16
    294         #endif
    295       #endif
    296     #endif
    297 
    298     // Disable AVX support on broken xcode versions
    299     #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
    300       // A nasty bug in the clang compiler shipped with xcode in a common compilation situation
    301       // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
    302       #ifdef EIGEN_VECTORIZE_AVX
    303         #undef EIGEN_VECTORIZE_AVX
    304         #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
    305         #ifdef EIGEN_VECTORIZE_AVX2
    306           #undef EIGEN_VECTORIZE_AVX2
    307         #endif
    308         #ifdef EIGEN_VECTORIZE_FMA
    309           #undef EIGEN_VECTORIZE_FMA
    310         #endif
    311         #ifdef EIGEN_VECTORIZE_AVX512
    312           #undef EIGEN_VECTORIZE_AVX512
    313         #endif
    314         #ifdef EIGEN_VECTORIZE_AVX512DQ
    315           #undef EIGEN_VECTORIZE_AVX512DQ
    316         #endif
    317         #ifdef EIGEN_VECTORIZE_AVX512ER
    318           #undef EIGEN_VECTORIZE_AVX512ER
    319         #endif
    320       #endif
    321       // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with  -macosx-version-min=10.15 and AVX
    322       // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests
    323       // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases
    324       // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)"  XCode 11.0 <- Produces many segfault and core dumping tests
    325       //                                                                    with  -macosx-version-min=10.15 and AVX
    326       // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with  
    327       //                                                                    -macosx-version-min=10.15 and AVX
    328     #endif
    329 
    330     // include files
    331 
    332     // This extern "C" works around a MINGW-w64 compilation issue
    333     // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
    334     // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
    335     // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
    336     // with conflicting linkage.  The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
    337     // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
    338     // notice that since these are C headers, the extern "C" is theoretically needed anyways.
    339     extern "C" {
    340       // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
    341       // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
    342       #if EIGEN_COMP_ICC >= 1110
    343         #include <immintrin.h>
    344       #else
    345         #include <mmintrin.h>
    346         #include <emmintrin.h>
    347         #include <xmmintrin.h>
    348         #ifdef  EIGEN_VECTORIZE_SSE3
    349         #include <pmmintrin.h>
    350         #endif
    351         #ifdef EIGEN_VECTORIZE_SSSE3
    352         #include <tmmintrin.h>
    353         #endif
    354         #ifdef EIGEN_VECTORIZE_SSE4_1
    355         #include <smmintrin.h>
    356         #endif
    357         #ifdef EIGEN_VECTORIZE_SSE4_2
    358         #include <nmmintrin.h>
    359         #endif
    360         #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
    361         #include <immintrin.h>
    362         #endif
    363       #endif
    364     } // end extern "C"
    365 
    366   #elif defined __VSX__
    367 
    368     #define EIGEN_VECTORIZE
    369     #define EIGEN_VECTORIZE_VSX
    370     #include <altivec.h>
    371     // We need to #undef all these ugly tokens defined in <altivec.h>
    372     // => use __vector instead of vector
    373     #undef bool
    374     #undef vector
    375     #undef pixel
    376 
    377   #elif defined __ALTIVEC__
    378 
    379     #define EIGEN_VECTORIZE
    380     #define EIGEN_VECTORIZE_ALTIVEC
    381     #include <altivec.h>
    382     // We need to #undef all these ugly tokens defined in <altivec.h>
    383     // => use __vector instead of vector
    384     #undef bool
    385     #undef vector
    386     #undef pixel
    387 
    388   #elif ((defined  __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
    389 
    390     #define EIGEN_VECTORIZE
    391     #define EIGEN_VECTORIZE_NEON
    392     #include <arm_neon.h>
    393 
    394   // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
    395   // will not select the backend automatically
    396   #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
    397 
    398     #define EIGEN_VECTORIZE
    399     #define EIGEN_VECTORIZE_SVE
    400     #include <arm_sve.h>
    401 
    402     // Since we depend on knowing SVE vector lengths at compile-time, we need
    403     // to ensure a fixed lengths is set
    404     #if defined __ARM_FEATURE_SVE_BITS
    405       #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
    406     #else
    407 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
    408 #endif
    409 
    410 #elif (defined __s390x__ && defined __VEC__)
    411 
    412 #define EIGEN_VECTORIZE
    413 #define EIGEN_VECTORIZE_ZVECTOR
    414 #include <vecintrin.h>
    415 
    416 #elif defined __mips_msa
    417 
    418 // Limit MSA optimizations to little-endian CPUs for now.
    419 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
    420 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
    421 #if defined(__LP64__)
    422 #define EIGEN_MIPS_64
    423 #else
    424 #define EIGEN_MIPS_32
    425 #endif
    426 #define EIGEN_VECTORIZE
    427 #define EIGEN_VECTORIZE_MSA
    428 #include <msa.h>
    429 #endif
    430 
    431 #endif
    432 #endif
    433 
    434 // Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
    435 // compilers seem to follow this. We therefore include it explicitly.
    436 // See also: https://bugs.llvm.org/show_bug.cgi?id=47955
    437 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
    438   #include <arm_fp16.h>
    439 #endif
    440 
    441 #if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
    442   // We can use the optimized fp16 to float and float to fp16 conversion routines
    443   #define EIGEN_HAS_FP16_C
    444 
    445   #if defined(EIGEN_COMP_CLANG)
    446     // Workaround for clang: The FP16C intrinsics for clang are included by
    447     // immintrin.h, as opposed to emmintrin.h as suggested by Intel:
    448     // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
    449     #include <immintrin.h>
    450   #endif
    451 #endif
    452 
    453 #if defined EIGEN_CUDACC
    454   #define EIGEN_VECTORIZE_GPU
    455   #include <vector_types.h>
    456   #if EIGEN_CUDA_SDK_VER >= 70500
    457     #define EIGEN_HAS_CUDA_FP16
    458   #endif
    459 #endif
    460 
    461 #if defined(EIGEN_HAS_CUDA_FP16)
    462   #include <cuda_runtime_api.h>
    463   #include <cuda_fp16.h>
    464 #endif
    465 
    466 #if defined(EIGEN_HIPCC)
    467   #define EIGEN_VECTORIZE_GPU
    468   #include <hip/hip_vector_types.h>
    469   #define EIGEN_HAS_HIP_FP16
    470   #include <hip/hip_fp16.h>
    471 #endif
    472 
    473 
    474 /** \brief Namespace containing all symbols from the %Eigen library. */
    475 namespace Eigen {
    476 
    477 inline static const char *SimdInstructionSetsInUse(void) {
    478 #if defined(EIGEN_VECTORIZE_AVX512)
    479   return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
    480 #elif defined(EIGEN_VECTORIZE_AVX)
    481   return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
    482 #elif defined(EIGEN_VECTORIZE_SSE4_2)
    483   return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
    484 #elif defined(EIGEN_VECTORIZE_SSE4_1)
    485   return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
    486 #elif defined(EIGEN_VECTORIZE_SSSE3)
    487   return "SSE, SSE2, SSE3, SSSE3";
    488 #elif defined(EIGEN_VECTORIZE_SSE3)
    489   return "SSE, SSE2, SSE3";
    490 #elif defined(EIGEN_VECTORIZE_SSE2)
    491   return "SSE, SSE2";
    492 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
    493   return "AltiVec";
    494 #elif defined(EIGEN_VECTORIZE_VSX)
    495   return "VSX";
    496 #elif defined(EIGEN_VECTORIZE_NEON)
    497   return "ARM NEON";
    498 #elif defined(EIGEN_VECTORIZE_SVE)
    499   return "ARM SVE";
    500 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
    501   return "S390X ZVECTOR";
    502 #elif defined(EIGEN_VECTORIZE_MSA)
    503   return "MIPS MSA";
    504 #else
    505   return "None";
    506 #endif
    507 }
    508 
    509 } // end namespace Eigen
    510 
    511 
    512 #endif // EIGEN_CONFIGURE_VECTORIZATION_H