PartialReduxEvaluator.h (9207B)
1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_PARTIALREDUX_H 11 #define EIGEN_PARTIALREDUX_H 12 13 namespace Eigen { 14 15 namespace internal { 16 17 18 /*************************************************************************** 19 * 20 * This file provides evaluators for partial reductions. 21 * There are two modes: 22 * 23 * - scalar path: simply calls the respective function on the column or row. 24 * -> nothing special here, all the tricky part is handled by the return 25 * types of VectorwiseOp's members. They embed the functor calling the 26 * respective DenseBase's member function. 27 * 28 * - vectorized path: implements a packet-wise reductions followed by 29 * some (optional) processing of the outcome, e.g., division by n for mean. 30 * 31 * For the vectorized path let's observe that the packet-size and outer-unrolling 32 * are both decided by the assignement logic. So all we have to do is to decide 33 * on the inner unrolling. 34 * 35 * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h, 36 * but be need to be careful to specify correct increment. 37 * 38 ***************************************************************************/ 39 40 41 /* logic deciding a strategy for unrolling of vectorized paths */ 42 template<typename Func, typename Evaluator> 43 struct packetwise_redux_traits 44 { 45 enum { 46 OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime, 47 Cost = OuterSize == Dynamic ? HugeCost 48 : OuterSize * Evaluator::CoeffReadCost + (OuterSize-1) * functor_traits<Func>::Cost, 49 Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling 50 }; 51 52 }; 53 54 /* Value to be returned when size==0 , by default let's return 0 */ 55 template<typename PacketType,typename Func> 56 EIGEN_DEVICE_FUNC 57 PacketType packetwise_redux_empty_value(const Func& ) { return pset1<PacketType>(0); } 58 59 /* For products the default is 1 */ 60 template<typename PacketType,typename Scalar> 61 EIGEN_DEVICE_FUNC 62 PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) { return pset1<PacketType>(1); } 63 64 /* Perform the actual reduction */ 65 template<typename Func, typename Evaluator, 66 int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling 67 > 68 struct packetwise_redux_impl; 69 70 /* Perform the actual reduction with unrolling */ 71 template<typename Func, typename Evaluator> 72 struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling> 73 { 74 typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base; 75 typedef typename Evaluator::Scalar Scalar; 76 77 template<typename PacketType> 78 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE 79 PacketType run(const Evaluator &eval, const Func& func, Index /*size*/) 80 { 81 return redux_vec_unroller<Func, Evaluator, 0, packetwise_redux_traits<Func, Evaluator>::OuterSize>::template run<PacketType>(eval,func); 82 } 83 }; 84 85 /* Add a specialization of redux_vec_unroller for size==0 at compiletime. 86 * This specialization is not required for general reductions, which is 87 * why it is defined here. 88 */ 89 template<typename Func, typename Evaluator, int Start> 90 struct redux_vec_unroller<Func, Evaluator, Start, 0> 91 { 92 template<typename PacketType> 93 EIGEN_DEVICE_FUNC 94 static EIGEN_STRONG_INLINE PacketType run(const Evaluator &, const Func& f) 95 { 96 return packetwise_redux_empty_value<PacketType>(f); 97 } 98 }; 99 100 /* Perform the actual reduction for dynamic sizes */ 101 template<typename Func, typename Evaluator> 102 struct packetwise_redux_impl<Func, Evaluator, NoUnrolling> 103 { 104 typedef typename Evaluator::Scalar Scalar; 105 typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar; 106 107 template<typename PacketType> 108 EIGEN_DEVICE_FUNC 109 static PacketType run(const Evaluator &eval, const Func& func, Index size) 110 { 111 if(size==0) 112 return packetwise_redux_empty_value<PacketType>(func); 113 114 const Index size4 = (size-1)&(~3); 115 PacketType p = eval.template packetByOuterInner<Unaligned,PacketType>(0,0); 116 Index i = 1; 117 // This loop is optimized for instruction pipelining: 118 // - each iteration generates two independent instructions 119 // - thanks to branch prediction and out-of-order execution we have independent instructions across loops 120 for(; i<size4; i+=4) 121 p = func.packetOp(p, 122 func.packetOp( 123 func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+0,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+1,0)), 124 func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+2,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+3,0)))); 125 for(; i<size; ++i) 126 p = func.packetOp(p, eval.template packetByOuterInner<Unaligned,PacketType>(i,0)); 127 return p; 128 } 129 }; 130 131 template< typename ArgType, typename MemberOp, int Direction> 132 struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > 133 : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > 134 { 135 typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType; 136 typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested; 137 typedef typename internal::add_const_on_value_type<ArgTypeNested>::type ConstArgTypeNested; 138 typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned; 139 typedef typename ArgType::Scalar InputScalar; 140 typedef typename XprType::Scalar Scalar; 141 enum { 142 TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) 143 }; 144 typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType; 145 enum { 146 CoeffReadCost = TraversalSize==Dynamic ? HugeCost 147 : TraversalSize==0 ? 1 148 : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value), 149 150 _ArgFlags = evaluator<ArgType>::Flags, 151 152 _Vectorizable = bool(int(_ArgFlags)&PacketAccessBit) 153 && bool(MemberOp::Vectorizable) 154 && (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0) 155 && (TraversalSize!=0), 156 157 Flags = (traits<XprType>::Flags&RowMajorBit) 158 | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) 159 | (_Vectorizable ? PacketAccessBit : 0) 160 | LinearAccessBit, 161 162 Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized 163 }; 164 165 EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) 166 : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) 167 { 168 EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : (TraversalSize==0 ? 1 : int(CostOpType::value))); 169 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); 170 } 171 172 typedef typename XprType::CoeffReturnType CoeffReturnType; 173 174 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 175 const Scalar coeff(Index i, Index j) const 176 { 177 return coeff(Direction==Vertical ? j : i); 178 } 179 180 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 181 const Scalar coeff(Index index) const 182 { 183 return m_functor(m_arg.template subVector<DirectionType(Direction)>(index)); 184 } 185 186 template<int LoadMode,typename PacketType> 187 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 188 PacketType packet(Index i, Index j) const 189 { 190 return packet<LoadMode,PacketType>(Direction==Vertical ? j : i); 191 } 192 193 template<int LoadMode,typename PacketType> 194 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC 195 PacketType packet(Index idx) const 196 { 197 enum { PacketSize = internal::unpacket_traits<PacketType>::size }; 198 typedef Block<const ArgTypeNestedCleaned, 199 Direction==Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize), 200 Direction==Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime), 201 true /* InnerPanel */> PanelType; 202 203 PanelType panel(m_arg, 204 Direction==Vertical ? 0 : idx, 205 Direction==Vertical ? idx : 0, 206 Direction==Vertical ? m_arg.rows() : Index(PacketSize), 207 Direction==Vertical ? Index(PacketSize) : m_arg.cols()); 208 209 // FIXME 210 // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of panel get reversed 211 // and methods like packetByOuterInner do not make sense anymore in this context. 212 // So let's just by pass "vectorization" in this case: 213 if(PacketSize==1) 214 return internal::pset1<PacketType>(coeff(idx)); 215 216 typedef typename internal::redux_evaluator<PanelType> PanelEvaluator; 217 PanelEvaluator panel_eval(panel); 218 typedef typename MemberOp::BinaryOp BinaryOp; 219 PacketType p = internal::packetwise_redux_impl<BinaryOp,PanelEvaluator>::template run<PacketType>(panel_eval,m_functor.binaryFunc(),m_arg.outerSize()); 220 return p; 221 } 222 223 protected: 224 ConstArgTypeNested m_arg; 225 const MemberOp m_functor; 226 }; 227 228 } // end namespace internal 229 230 } // end namespace Eigen 231 232 #endif // EIGEN_PARTIALREDUX_H