ConfigureVectorization.h (19876B)
1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr> 5 // Copyright (C) 2020, Arm Limited and Contributors 6 // 7 // This Source Code Form is subject to the terms of the Mozilla 8 // Public License v. 2.0. If a copy of the MPL was not distributed 9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H 12 #define EIGEN_CONFIGURE_VECTORIZATION_H 13 14 //------------------------------------------------------------------------------------------ 15 // Static and dynamic alignment control 16 // 17 // The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES 18 // as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively. 19 // The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not, 20 // a default value is automatically computed based on architecture, compiler, and OS. 21 // 22 // This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX} 23 // to be used to declare statically aligned buffers. 24 //------------------------------------------------------------------------------------------ 25 26 27 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements. 28 * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled, 29 * so that vectorization doesn't affect binary compatibility. 30 * 31 * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link 32 * vectorized and non-vectorized code. 33 * 34 * FIXME: this code can be cleaned up once we switch to proper C++11 only. 35 */ 36 #if (defined EIGEN_CUDACC) 37 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) 38 #define EIGEN_ALIGNOF(x) __alignof(x) 39 #elif EIGEN_HAS_ALIGNAS 40 #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n) 41 #define EIGEN_ALIGNOF(x) alignof(x) 42 #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM 43 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) 44 #define EIGEN_ALIGNOF(x) __alignof(x) 45 #elif EIGEN_COMP_MSVC 46 #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) 47 #define EIGEN_ALIGNOF(x) __alignof(x) 48 #elif EIGEN_COMP_SUNCC 49 // FIXME not sure about this one: 50 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) 51 #define EIGEN_ALIGNOF(x) __alignof(x) 52 #else 53 #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler 54 #endif 55 56 // If the user explicitly disable vectorization, then we also disable alignment 57 #if defined(EIGEN_DONT_VECTORIZE) 58 #if defined(EIGEN_GPUCC) 59 // GPU code is always vectorized and requires memory alignment for 60 // statically allocated buffers. 61 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 62 #else 63 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 64 #endif 65 #elif defined(__AVX512F__) 66 // 64 bytes static alignment is preferred only if really required 67 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64 68 #elif defined(__AVX__) 69 // 32 bytes static alignment is preferred only if really required 70 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32 71 #else 72 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 73 #endif 74 75 76 // EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense 77 #define EIGEN_MIN_ALIGN_BYTES 16 78 79 // Defined the boundary (in bytes) on which the data needs to be aligned. Note 80 // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be 81 // aligned at all regardless of the value of this #define. 82 83 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 84 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY. 85 #endif 86 87 // EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated 88 // They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0 89 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN) 90 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES 91 #undef EIGEN_MAX_STATIC_ALIGN_BYTES 92 #endif 93 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 94 #endif 95 96 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES 97 98 // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES 99 100 // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable 101 // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always 102 // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in 103 // certain common platform (compiler+architecture combinations) to avoid these problems. 104 // Only static alignment is really problematic (relies on nonstandard compiler extensions), 105 // try to keep heap alignment even when we have to disable static alignment. 106 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS) 107 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 108 #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6) 109 // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support. 110 // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use. 111 // 4.8 and newer seem definitely unaffected. 112 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 113 #else 114 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 115 #endif 116 117 // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX 118 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ 119 && !EIGEN_GCC3_OR_OLDER \ 120 && !EIGEN_COMP_SUNCC \ 121 && !EIGEN_OS_QNX 122 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 123 #else 124 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 125 #endif 126 127 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT 128 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES 129 #else 130 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 131 #endif 132 133 #endif 134 135 // If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES 136 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES 137 #undef EIGEN_MAX_STATIC_ALIGN_BYTES 138 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES 139 #endif 140 141 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) 142 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT 143 #endif 144 145 // At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not. 146 // It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES) 147 // and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). 148 // Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used. 149 150 151 // Shortcuts to EIGEN_ALIGN_TO_BOUNDARY 152 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8) 153 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16) 154 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32) 155 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64) 156 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0 157 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES) 158 #else 159 #define EIGEN_ALIGN_MAX 160 #endif 161 162 163 // Dynamic alignment control 164 165 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0 166 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN. 167 #endif 168 169 #ifdef EIGEN_DONT_ALIGN 170 #ifdef EIGEN_MAX_ALIGN_BYTES 171 #undef EIGEN_MAX_ALIGN_BYTES 172 #endif 173 #define EIGEN_MAX_ALIGN_BYTES 0 174 #elif !defined(EIGEN_MAX_ALIGN_BYTES) 175 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES 176 #endif 177 178 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES 179 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES 180 #else 181 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES 182 #endif 183 184 185 #ifndef EIGEN_UNALIGNED_VECTORIZE 186 #define EIGEN_UNALIGNED_VECTORIZE 1 187 #endif 188 189 //---------------------------------------------------------------------- 190 191 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into 192 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks 193 #if EIGEN_MAX_ALIGN_BYTES==0 194 #ifndef EIGEN_DONT_VECTORIZE 195 #define EIGEN_DONT_VECTORIZE 196 #endif 197 #endif 198 199 200 // The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be 201 // removed as gcc 4.1 and msvc 2008 are not supported anyways. 202 #if EIGEN_COMP_MSVC 203 #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled 204 #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later 205 // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. 206 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 207 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER 208 #endif 209 #endif 210 #else 211 #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) 212 #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC 213 #endif 214 #endif 215 216 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC)) 217 218 #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) 219 220 // Defines symbols for compile-time detection of which instructions are 221 // used. 222 // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used 223 #define EIGEN_VECTORIZE 224 #define EIGEN_VECTORIZE_SSE 225 #define EIGEN_VECTORIZE_SSE2 226 227 // Detect sse3/ssse3/sse4: 228 // gcc and icc defines __SSE3__, ... 229 // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you 230 // want to force the use of those instructions with msvc. 231 #ifdef __SSE3__ 232 #define EIGEN_VECTORIZE_SSE3 233 #endif 234 #ifdef __SSSE3__ 235 #define EIGEN_VECTORIZE_SSSE3 236 #endif 237 #ifdef __SSE4_1__ 238 #define EIGEN_VECTORIZE_SSE4_1 239 #endif 240 #ifdef __SSE4_2__ 241 #define EIGEN_VECTORIZE_SSE4_2 242 #endif 243 #ifdef __AVX__ 244 #ifndef EIGEN_USE_SYCL 245 #define EIGEN_VECTORIZE_AVX 246 #endif 247 #define EIGEN_VECTORIZE_SSE3 248 #define EIGEN_VECTORIZE_SSSE3 249 #define EIGEN_VECTORIZE_SSE4_1 250 #define EIGEN_VECTORIZE_SSE4_2 251 #endif 252 #ifdef __AVX2__ 253 #ifndef EIGEN_USE_SYCL 254 #define EIGEN_VECTORIZE_AVX2 255 #define EIGEN_VECTORIZE_AVX 256 #endif 257 #define EIGEN_VECTORIZE_SSE3 258 #define EIGEN_VECTORIZE_SSSE3 259 #define EIGEN_VECTORIZE_SSE4_1 260 #define EIGEN_VECTORIZE_SSE4_2 261 #endif 262 #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__)) 263 // MSVC does not expose a switch dedicated for FMA 264 // For MSVC, AVX2 => FMA 265 #define EIGEN_VECTORIZE_FMA 266 #endif 267 #if defined(__AVX512F__) 268 #ifndef EIGEN_VECTORIZE_FMA 269 #if EIGEN_COMP_GNUC 270 #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638). 271 #else 272 #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638). 273 #endif 274 #endif 275 #ifndef EIGEN_USE_SYCL 276 #define EIGEN_VECTORIZE_AVX512 277 #define EIGEN_VECTORIZE_AVX2 278 #define EIGEN_VECTORIZE_AVX 279 #endif 280 #define EIGEN_VECTORIZE_FMA 281 #define EIGEN_VECTORIZE_SSE3 282 #define EIGEN_VECTORIZE_SSSE3 283 #define EIGEN_VECTORIZE_SSE4_1 284 #define EIGEN_VECTORIZE_SSE4_2 285 #ifndef EIGEN_USE_SYCL 286 #ifdef __AVX512DQ__ 287 #define EIGEN_VECTORIZE_AVX512DQ 288 #endif 289 #ifdef __AVX512ER__ 290 #define EIGEN_VECTORIZE_AVX512ER 291 #endif 292 #ifdef __AVX512BF16__ 293 #define EIGEN_VECTORIZE_AVX512BF16 294 #endif 295 #endif 296 #endif 297 298 // Disable AVX support on broken xcode versions 299 #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 ) 300 // A nasty bug in the clang compiler shipped with xcode in a common compilation situation 301 // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1 302 #ifdef EIGEN_VECTORIZE_AVX 303 #undef EIGEN_VECTORIZE_AVX 304 #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. " 305 #ifdef EIGEN_VECTORIZE_AVX2 306 #undef EIGEN_VECTORIZE_AVX2 307 #endif 308 #ifdef EIGEN_VECTORIZE_FMA 309 #undef EIGEN_VECTORIZE_FMA 310 #endif 311 #ifdef EIGEN_VECTORIZE_AVX512 312 #undef EIGEN_VECTORIZE_AVX512 313 #endif 314 #ifdef EIGEN_VECTORIZE_AVX512DQ 315 #undef EIGEN_VECTORIZE_AVX512DQ 316 #endif 317 #ifdef EIGEN_VECTORIZE_AVX512ER 318 #undef EIGEN_VECTORIZE_AVX512ER 319 #endif 320 #endif 321 // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX 322 // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests 323 // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases 324 // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping tests 325 // with -macosx-version-min=10.15 and AVX 326 // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with 327 // -macosx-version-min=10.15 and AVX 328 #endif 329 330 // include files 331 332 // This extern "C" works around a MINGW-w64 compilation issue 333 // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 334 // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). 335 // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations 336 // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; 337 // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. 338 // notice that since these are C headers, the extern "C" is theoretically needed anyways. 339 extern "C" { 340 // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. 341 // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: 342 #if EIGEN_COMP_ICC >= 1110 343 #include <immintrin.h> 344 #else 345 #include <mmintrin.h> 346 #include <emmintrin.h> 347 #include <xmmintrin.h> 348 #ifdef EIGEN_VECTORIZE_SSE3 349 #include <pmmintrin.h> 350 #endif 351 #ifdef EIGEN_VECTORIZE_SSSE3 352 #include <tmmintrin.h> 353 #endif 354 #ifdef EIGEN_VECTORIZE_SSE4_1 355 #include <smmintrin.h> 356 #endif 357 #ifdef EIGEN_VECTORIZE_SSE4_2 358 #include <nmmintrin.h> 359 #endif 360 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) 361 #include <immintrin.h> 362 #endif 363 #endif 364 } // end extern "C" 365 366 #elif defined __VSX__ 367 368 #define EIGEN_VECTORIZE 369 #define EIGEN_VECTORIZE_VSX 370 #include <altivec.h> 371 // We need to #undef all these ugly tokens defined in <altivec.h> 372 // => use __vector instead of vector 373 #undef bool 374 #undef vector 375 #undef pixel 376 377 #elif defined __ALTIVEC__ 378 379 #define EIGEN_VECTORIZE 380 #define EIGEN_VECTORIZE_ALTIVEC 381 #include <altivec.h> 382 // We need to #undef all these ugly tokens defined in <altivec.h> 383 // => use __vector instead of vector 384 #undef bool 385 #undef vector 386 #undef pixel 387 388 #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE) 389 390 #define EIGEN_VECTORIZE 391 #define EIGEN_VECTORIZE_NEON 392 #include <arm_neon.h> 393 394 // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and 395 // will not select the backend automatically 396 #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE) 397 398 #define EIGEN_VECTORIZE 399 #define EIGEN_VECTORIZE_SVE 400 #include <arm_sve.h> 401 402 // Since we depend on knowing SVE vector lengths at compile-time, we need 403 // to ensure a fixed lengths is set 404 #if defined __ARM_FEATURE_SVE_BITS 405 #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS 406 #else 407 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set." 408 #endif 409 410 #elif (defined __s390x__ && defined __VEC__) 411 412 #define EIGEN_VECTORIZE 413 #define EIGEN_VECTORIZE_ZVECTOR 414 #include <vecintrin.h> 415 416 #elif defined __mips_msa 417 418 // Limit MSA optimizations to little-endian CPUs for now. 419 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? 420 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 421 #if defined(__LP64__) 422 #define EIGEN_MIPS_64 423 #else 424 #define EIGEN_MIPS_32 425 #endif 426 #define EIGEN_VECTORIZE 427 #define EIGEN_VECTORIZE_MSA 428 #include <msa.h> 429 #endif 430 431 #endif 432 #endif 433 434 // Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all 435 // compilers seem to follow this. We therefore include it explicitly. 436 // See also: https://bugs.llvm.org/show_bug.cgi?id=47955 437 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) 438 #include <arm_fp16.h> 439 #endif 440 441 #if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380)) 442 // We can use the optimized fp16 to float and float to fp16 conversion routines 443 #define EIGEN_HAS_FP16_C 444 445 #if defined(EIGEN_COMP_CLANG) 446 // Workaround for clang: The FP16C intrinsics for clang are included by 447 // immintrin.h, as opposed to emmintrin.h as suggested by Intel: 448 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711 449 #include <immintrin.h> 450 #endif 451 #endif 452 453 #if defined EIGEN_CUDACC 454 #define EIGEN_VECTORIZE_GPU 455 #include <vector_types.h> 456 #if EIGEN_CUDA_SDK_VER >= 70500 457 #define EIGEN_HAS_CUDA_FP16 458 #endif 459 #endif 460 461 #if defined(EIGEN_HAS_CUDA_FP16) 462 #include <cuda_runtime_api.h> 463 #include <cuda_fp16.h> 464 #endif 465 466 #if defined(EIGEN_HIPCC) 467 #define EIGEN_VECTORIZE_GPU 468 #include <hip/hip_vector_types.h> 469 #define EIGEN_HAS_HIP_FP16 470 #include <hip/hip_fp16.h> 471 #endif 472 473 474 /** \brief Namespace containing all symbols from the %Eigen library. */ 475 namespace Eigen { 476 477 inline static const char *SimdInstructionSetsInUse(void) { 478 #if defined(EIGEN_VECTORIZE_AVX512) 479 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; 480 #elif defined(EIGEN_VECTORIZE_AVX) 481 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; 482 #elif defined(EIGEN_VECTORIZE_SSE4_2) 483 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; 484 #elif defined(EIGEN_VECTORIZE_SSE4_1) 485 return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; 486 #elif defined(EIGEN_VECTORIZE_SSSE3) 487 return "SSE, SSE2, SSE3, SSSE3"; 488 #elif defined(EIGEN_VECTORIZE_SSE3) 489 return "SSE, SSE2, SSE3"; 490 #elif defined(EIGEN_VECTORIZE_SSE2) 491 return "SSE, SSE2"; 492 #elif defined(EIGEN_VECTORIZE_ALTIVEC) 493 return "AltiVec"; 494 #elif defined(EIGEN_VECTORIZE_VSX) 495 return "VSX"; 496 #elif defined(EIGEN_VECTORIZE_NEON) 497 return "ARM NEON"; 498 #elif defined(EIGEN_VECTORIZE_SVE) 499 return "ARM SVE"; 500 #elif defined(EIGEN_VECTORIZE_ZVECTOR) 501 return "S390X ZVECTOR"; 502 #elif defined(EIGEN_VECTORIZE_MSA) 503 return "MIPS MSA"; 504 #else 505 return "None"; 506 #endif 507 } 508 509 } // end namespace Eigen 510 511 512 #endif // EIGEN_CONFIGURE_VECTORIZATION_H