cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

MathFunctions.h (8024B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2007 Julien Pommier
      5 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
      6 // Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
      7 //
      8 // This Source Code Form is subject to the terms of the Mozilla
      9 // Public License v. 2.0. If a copy of the MPL was not distributed
     10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     11 
     12 /* The sin, cos, exp, and log functions of this file come from
     13  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
     14  */
     15 
     16 #ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
     17 #define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
     18 
     19 namespace Eigen {
     20 
     21 namespace internal {
     22 
     23 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
     24 static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
     25 static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
     26 static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
     27 static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
     28 
     29 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
     30 
     31 /* the smallest non denormalized float number */
     32 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
     33 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000); // -1.f/0.f
     34 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan,     0xffffffff);
     35   
     36 /* natural logarithm computed for 4 simultaneous float
     37   return NaN for x <= 0
     38 */
     39 static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
     40 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
     41 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
     42 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
     43 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
     44 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
     45 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
     46 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
     47 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
     48 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
     49 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
     50 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
     51 
     52 static _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
     53 static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
     54 
     55 static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
     56 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
     57 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
     58 
     59 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
     60 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
     61 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
     62 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
     63 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
     64 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
     65 #endif
     66 
     67 static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
     68 static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
     69 static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
     70 
     71 static _EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
     72 static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
     73 
     74 static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
     75 
     76 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
     77 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
     78 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
     79 
     80 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
     81 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
     82 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
     83 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
     84 
     85 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
     86 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
     87 
     88 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
     89 Packet2d pexp<Packet2d>(const Packet2d& _x)
     90 {
     91   Packet2d x = _x;
     92 
     93   Packet2d tmp, fx;
     94   Packet2l emm0;
     95 
     96   // clamp x
     97   x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
     98   /* express exp(x) as exp(g + n*log(2)) */
     99   fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
    100 
    101   fx = vec_floor(fx);
    102 
    103   tmp = pmul(fx, p2d_cephes_exp_C1);
    104   Packet2d z = pmul(fx, p2d_cephes_exp_C2);
    105   x = psub(x, tmp);
    106   x = psub(x, z);
    107 
    108   Packet2d x2 = pmul(x,x);
    109 
    110   Packet2d px = p2d_cephes_exp_p0;
    111   px = pmadd(px, x2, p2d_cephes_exp_p1);
    112   px = pmadd(px, x2, p2d_cephes_exp_p2);
    113   px = pmul (px, x);
    114 
    115   Packet2d qx = p2d_cephes_exp_q0;
    116   qx = pmadd(qx, x2, p2d_cephes_exp_q1);
    117   qx = pmadd(qx, x2, p2d_cephes_exp_q2);
    118   qx = pmadd(qx, x2, p2d_cephes_exp_q3);
    119 
    120   x = pdiv(px,psub(qx,px));
    121   x = pmadd(p2d_2,x,p2d_1);
    122 
    123   // build 2^n
    124   emm0 = vec_ctsl(fx, 0);
    125 
    126   static const Packet2l p2l_1023 = { 1023, 1023 };
    127   static const Packet2ul p2ul_52 = { 52, 52 };
    128 
    129   emm0 = emm0 + p2l_1023;
    130   emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
    131 
    132   // Altivec's max & min operators just drop silent NaNs. Check NaNs in 
    133   // inputs and return them unmodified.
    134   Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
    135   return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
    136                  isnumber_mask);
    137 }
    138 
    139 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
    140 Packet4f pexp<Packet4f>(const Packet4f& _x)
    141 {
    142 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
    143   Packet4f x = _x;
    144 
    145   Packet4f tmp, fx;
    146   Packet4i emm0;
    147 
    148   // clamp x
    149   x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
    150 
    151   // express exp(x) as exp(g + n*log(2))
    152   fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
    153 
    154   fx = pfloor(fx);
    155 
    156   tmp = pmul(fx, p4f_cephes_exp_C1);
    157   Packet4f z = pmul(fx, p4f_cephes_exp_C2);
    158   x = psub(x, tmp);
    159   x = psub(x, z);
    160 
    161   z = pmul(x,x);
    162 
    163   Packet4f y = p4f_cephes_exp_p0;
    164   y = pmadd(y, x, p4f_cephes_exp_p1);
    165   y = pmadd(y, x, p4f_cephes_exp_p2);
    166   y = pmadd(y, x, p4f_cephes_exp_p3);
    167   y = pmadd(y, x, p4f_cephes_exp_p4);
    168   y = pmadd(y, x, p4f_cephes_exp_p5);
    169   y = pmadd(y, z, x);
    170   y = padd(y, p4f_1);
    171 
    172   // build 2^n
    173   emm0 = (Packet4i){ (int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3] };
    174   emm0 = emm0 + p4i_0x7f;
    175   emm0 = emm0 << reinterpret_cast<Packet4i>(p4i_23);
    176 
    177   return pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x);
    178 #else
    179   Packet4f res;
    180   res.v4f[0] = pexp<Packet2d>(_x.v4f[0]);
    181   res.v4f[1] = pexp<Packet2d>(_x.v4f[1]);
    182   return res;
    183 #endif
    184 }
    185 
    186 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
    187 Packet2d psqrt<Packet2d>(const Packet2d& x)
    188 {
    189   return vec_sqrt(x);
    190 }
    191 
    192 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
    193 Packet4f psqrt<Packet4f>(const Packet4f& x)
    194 {
    195   Packet4f res;
    196 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
    197   res = vec_sqrt(x);
    198 #else
    199   res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
    200   res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
    201 #endif
    202   return res;
    203 }
    204 
    205 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
    206 Packet2d prsqrt<Packet2d>(const Packet2d& x) {
    207   return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
    208 }
    209 
    210 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
    211 Packet4f prsqrt<Packet4f>(const Packet4f& x) {
    212   Packet4f res;
    213 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
    214   res = pset1<Packet4f>(1.0) / psqrt<Packet4f>(x);
    215 #else
    216   res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
    217   res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
    218 #endif
    219   return res;
    220 }
    221 
    222 // Hyperbolic Tangent function.
    223 template <>
    224 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
    225 ptanh<Packet4f>(const Packet4f& x) {
    226   return internal::generic_fast_tanh_float(x);
    227 }
    228 
    229 }  // end namespace internal
    230 
    231 }  // end namespace Eigen
    232 
    233 #endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H