cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

SparseLU.h (33316B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
      5 // Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 
     12 #ifndef EIGEN_SPARSE_LU_H
     13 #define EIGEN_SPARSE_LU_H
     14 
     15 namespace Eigen {
     16 
     17 template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::StorageIndex> > class SparseLU;
     18 template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType;
     19 template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType;
     20 
     21 template <bool Conjugate,class SparseLUType>
     22 class SparseLUTransposeView : public SparseSolverBase<SparseLUTransposeView<Conjugate,SparseLUType> >
     23 {
     24 protected:
     25   typedef SparseSolverBase<SparseLUTransposeView<Conjugate,SparseLUType> > APIBase;
     26   using APIBase::m_isInitialized;
     27 public:
     28   typedef typename SparseLUType::Scalar Scalar;
     29   typedef typename SparseLUType::StorageIndex StorageIndex;
     30   typedef typename SparseLUType::MatrixType MatrixType;
     31   typedef typename SparseLUType::OrderingType OrderingType;
     32 
     33   enum {
     34     ColsAtCompileTime = MatrixType::ColsAtCompileTime,
     35     MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
     36   };
     37 
     38   SparseLUTransposeView() : m_sparseLU(NULL) {}
     39   SparseLUTransposeView(const SparseLUTransposeView& view) {
     40     this->m_sparseLU = view.m_sparseLU;
     41   }
     42   void setIsInitialized(const bool isInitialized) {this->m_isInitialized = isInitialized;}
     43   void setSparseLU(SparseLUType* sparseLU) {m_sparseLU = sparseLU;}
     44   using APIBase::_solve_impl;
     45   template<typename Rhs, typename Dest>
     46   bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
     47   {
     48     Dest& X(X_base.derived());
     49     eigen_assert(m_sparseLU->info() == Success && "The matrix should be factorized first");
     50     EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
     51                         THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
     52 
     53 
     54     // this ugly const_cast_derived() helps to detect aliasing when applying the permutations
     55     for(Index j = 0; j < B.cols(); ++j){
     56       X.col(j) = m_sparseLU->colsPermutation() * B.const_cast_derived().col(j);
     57     }
     58     //Forward substitution with transposed or adjoint of U
     59     m_sparseLU->matrixU().template solveTransposedInPlace<Conjugate>(X);
     60 
     61     //Backward substitution with transposed or adjoint of L
     62     m_sparseLU->matrixL().template solveTransposedInPlace<Conjugate>(X);
     63 
     64     // Permute back the solution
     65     for (Index j = 0; j < B.cols(); ++j)
     66       X.col(j) = m_sparseLU->rowsPermutation().transpose() * X.col(j);
     67     return true;
     68   }
     69   inline Index rows() const { return m_sparseLU->rows(); }
     70   inline Index cols() const { return m_sparseLU->cols(); }
     71 
     72 private:
     73   SparseLUType *m_sparseLU;
     74   SparseLUTransposeView& operator=(const SparseLUTransposeView&);
     75 };
     76 
     77 
     78 /** \ingroup SparseLU_Module
     79   * \class SparseLU
     80   * 
     81   * \brief Sparse supernodal LU factorization for general matrices
     82   * 
     83   * This class implements the supernodal LU factorization for general matrices.
     84   * It uses the main techniques from the sequential SuperLU package 
     85   * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real 
     86   * and complex arithmetic with single and double precision, depending on the 
     87   * scalar type of your input matrix. 
     88   * The code has been optimized to provide BLAS-3 operations during supernode-panel updates. 
     89   * It benefits directly from the built-in high-performant Eigen BLAS routines. 
     90   * Moreover, when the size of a supernode is very small, the BLAS calls are avoided to 
     91   * enable a better optimization from the compiler. For best performance, 
     92   * you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors. 
     93   * 
     94   * An important parameter of this class is the ordering method. It is used to reorder the columns 
     95   * (and eventually the rows) of the matrix to reduce the number of new elements that are created during 
     96   * numerical factorization. The cheapest method available is COLAMD. 
     97   * See  \link OrderingMethods_Module the OrderingMethods module \endlink for the list of 
     98   * built-in and external ordering methods. 
     99   *
    100   * Simple example with key steps 
    101   * \code
    102   * VectorXd x(n), b(n);
    103   * SparseMatrix<double> A;
    104   * SparseLU<SparseMatrix<double>, COLAMDOrdering<int> >   solver;
    105   * // fill A and b;
    106   * // Compute the ordering permutation vector from the structural pattern of A
    107   * solver.analyzePattern(A); 
    108   * // Compute the numerical factorization 
    109   * solver.factorize(A); 
    110   * //Use the factors to solve the linear system 
    111   * x = solver.solve(b); 
    112   * \endcode
    113   * 
    114   * \warning The input matrix A should be in a \b compressed and \b column-major form.
    115   * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix.
    116   * 
    117   * \note Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix. 
    118   * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization. 
    119   * If this is the case for your matrices, you can try the basic scaling method at
    120   *  "unsupported/Eigen/src/IterativeSolvers/Scaling.h"
    121   * 
    122   * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<>
    123   * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD
    124   *
    125   * \implsparsesolverconcept
    126   * 
    127   * \sa \ref TutorialSparseSolverConcept
    128   * \sa \ref OrderingMethods_Module
    129   */
    130 template <typename _MatrixType, typename _OrderingType>
    131 class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::StorageIndex>
    132 {
    133   protected:
    134     typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
    135     using APIBase::m_isInitialized;
    136   public:
    137     using APIBase::_solve_impl;
    138     
    139     typedef _MatrixType MatrixType; 
    140     typedef _OrderingType OrderingType;
    141     typedef typename MatrixType::Scalar Scalar; 
    142     typedef typename MatrixType::RealScalar RealScalar; 
    143     typedef typename MatrixType::StorageIndex StorageIndex;
    144     typedef SparseMatrix<Scalar,ColMajor,StorageIndex> NCMatrix;
    145     typedef internal::MappedSuperNodalMatrix<Scalar, StorageIndex> SCMatrix;
    146     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
    147     typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
    148     typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;
    149     typedef internal::SparseLUImpl<Scalar, StorageIndex> Base;
    150 
    151     enum {
    152       ColsAtCompileTime = MatrixType::ColsAtCompileTime,
    153       MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    154     };
    155     
    156   public:
    157 
    158     SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
    159     {
    160       initperfvalues(); 
    161     }
    162     explicit SparseLU(const MatrixType& matrix)
    163       : m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
    164     {
    165       initperfvalues(); 
    166       compute(matrix);
    167     }
    168     
    169     ~SparseLU()
    170     {
    171       // Free all explicit dynamic pointers 
    172     }
    173     
    174     void analyzePattern (const MatrixType& matrix);
    175     void factorize (const MatrixType& matrix);
    176     void simplicialfactorize(const MatrixType& matrix);
    177     
    178     /**
    179       * Compute the symbolic and numeric factorization of the input sparse matrix.
    180       * The input matrix should be in column-major storage. 
    181       */
    182     void compute (const MatrixType& matrix)
    183     {
    184       // Analyze 
    185       analyzePattern(matrix); 
    186       //Factorize
    187       factorize(matrix);
    188     } 
    189 
    190     /** \returns an expression of the transposed of the factored matrix.
    191       *
    192       * A typical usage is to solve for the transposed problem A^T x = b:
    193       * \code
    194       * solver.compute(A);
    195       * x = solver.transpose().solve(b);
    196       * \endcode
    197       *
    198       * \sa adjoint(), solve()
    199       */
    200     const SparseLUTransposeView<false,SparseLU<_MatrixType,_OrderingType> > transpose()
    201     {
    202       SparseLUTransposeView<false,  SparseLU<_MatrixType,_OrderingType> > transposeView;
    203       transposeView.setSparseLU(this);
    204       transposeView.setIsInitialized(this->m_isInitialized);
    205       return transposeView;
    206     }
    207 
    208 
    209     /** \returns an expression of the adjoint of the factored matrix
    210       *
    211       * A typical usage is to solve for the adjoint problem A' x = b:
    212       * \code
    213       * solver.compute(A);
    214       * x = solver.adjoint().solve(b);
    215       * \endcode
    216       *
    217       * For real scalar types, this function is equivalent to transpose().
    218       *
    219       * \sa transpose(), solve()
    220       */
    221     const SparseLUTransposeView<true, SparseLU<_MatrixType,_OrderingType> > adjoint()
    222     {
    223       SparseLUTransposeView<true,  SparseLU<_MatrixType,_OrderingType> > adjointView;
    224       adjointView.setSparseLU(this);
    225       adjointView.setIsInitialized(this->m_isInitialized);
    226       return adjointView;
    227     }
    228     
    229     inline Index rows() const { return m_mat.rows(); }
    230     inline Index cols() const { return m_mat.cols(); }
    231     /** Indicate that the pattern of the input matrix is symmetric */
    232     void isSymmetric(bool sym)
    233     {
    234       m_symmetricmode = sym;
    235     }
    236     
    237     /** \returns an expression of the matrix L, internally stored as supernodes
    238       * The only operation available with this expression is the triangular solve
    239       * \code
    240       * y = b; matrixL().solveInPlace(y);
    241       * \endcode
    242       */
    243     SparseLUMatrixLReturnType<SCMatrix> matrixL() const
    244     {
    245       return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore);
    246     }
    247     /** \returns an expression of the matrix U,
    248       * The only operation available with this expression is the triangular solve
    249       * \code
    250       * y = b; matrixU().solveInPlace(y);
    251       * \endcode
    252       */
    253     SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,StorageIndex> > matrixU() const
    254     {
    255       return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,StorageIndex> >(m_Lstore, m_Ustore);
    256     }
    257 
    258     /**
    259       * \returns a reference to the row matrix permutation \f$ P_r \f$ such that \f$P_r A P_c^T = L U\f$
    260       * \sa colsPermutation()
    261       */
    262     inline const PermutationType& rowsPermutation() const
    263     {
    264       return m_perm_r;
    265     }
    266     /**
    267       * \returns a reference to the column matrix permutation\f$ P_c^T \f$ such that \f$P_r A P_c^T = L U\f$
    268       * \sa rowsPermutation()
    269       */
    270     inline const PermutationType& colsPermutation() const
    271     {
    272       return m_perm_c;
    273     }
    274     /** Set the threshold used for a diagonal entry to be an acceptable pivot. */
    275     void setPivotThreshold(const RealScalar& thresh)
    276     {
    277       m_diagpivotthresh = thresh; 
    278     }
    279 
    280 #ifdef EIGEN_PARSED_BY_DOXYGEN
    281     /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
    282       *
    283       * \warning the destination matrix X in X = this->solve(B) must be colmun-major.
    284       *
    285       * \sa compute()
    286       */
    287     template<typename Rhs>
    288     inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;
    289 #endif // EIGEN_PARSED_BY_DOXYGEN
    290     
    291     /** \brief Reports whether previous computation was successful.
    292       *
    293       * \returns \c Success if computation was successful,
    294       *          \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance
    295       *          \c InvalidInput if the input matrix is invalid
    296       *
    297       * \sa iparm()          
    298       */
    299     ComputationInfo info() const
    300     {
    301       eigen_assert(m_isInitialized && "Decomposition is not initialized.");
    302       return m_info;
    303     }
    304     
    305     /**
    306       * \returns A string describing the type of error
    307       */
    308     std::string lastErrorMessage() const
    309     {
    310       return m_lastError; 
    311     }
    312 
    313     template<typename Rhs, typename Dest>
    314     bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
    315     {
    316       Dest& X(X_base.derived());
    317       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
    318       EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0,
    319                         THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
    320       
    321       // Permute the right hand side to form X = Pr*B
    322       // on return, X is overwritten by the computed solution
    323       X.resize(B.rows(),B.cols());
    324 
    325       // this ugly const_cast_derived() helps to detect aliasing when applying the permutations
    326       for(Index j = 0; j < B.cols(); ++j)
    327         X.col(j) = rowsPermutation() * B.const_cast_derived().col(j);
    328       
    329       //Forward substitution with L
    330       this->matrixL().solveInPlace(X);
    331       this->matrixU().solveInPlace(X);
    332       
    333       // Permute back the solution 
    334       for (Index j = 0; j < B.cols(); ++j)
    335         X.col(j) = colsPermutation().inverse() * X.col(j);
    336       
    337       return true; 
    338     }
    339     
    340     /**
    341       * \returns the absolute value of the determinant of the matrix of which
    342       * *this is the QR decomposition.
    343       *
    344       * \warning a determinant can be very big or small, so for matrices
    345       * of large enough dimension, there is a risk of overflow/underflow.
    346       * One way to work around that is to use logAbsDeterminant() instead.
    347       *
    348       * \sa logAbsDeterminant(), signDeterminant()
    349       */
    350     Scalar absDeterminant()
    351     {
    352       using std::abs;
    353       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
    354       // Initialize with the determinant of the row matrix
    355       Scalar det = Scalar(1.);
    356       // Note that the diagonal blocks of U are stored in supernodes,
    357       // which are available in the  L part :)
    358       for (Index j = 0; j < this->cols(); ++j)
    359       {
    360         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
    361         {
    362           if(it.index() == j)
    363           {
    364             det *= abs(it.value());
    365             break;
    366           }
    367         }
    368       }
    369       return det;
    370     }
    371 
    372     /** \returns the natural log of the absolute value of the determinant of the matrix
    373       * of which **this is the QR decomposition
    374       *
    375       * \note This method is useful to work around the risk of overflow/underflow that's
    376       * inherent to the determinant computation.
    377       *
    378       * \sa absDeterminant(), signDeterminant()
    379       */
    380     Scalar logAbsDeterminant() const
    381     {
    382       using std::log;
    383       using std::abs;
    384 
    385       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
    386       Scalar det = Scalar(0.);
    387       for (Index j = 0; j < this->cols(); ++j)
    388       {
    389         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
    390         {
    391           if(it.row() < j) continue;
    392           if(it.row() == j)
    393           {
    394             det += log(abs(it.value()));
    395             break;
    396           }
    397         }
    398       }
    399       return det;
    400     }
    401 
    402     /** \returns A number representing the sign of the determinant
    403       *
    404       * \sa absDeterminant(), logAbsDeterminant()
    405       */
    406     Scalar signDeterminant()
    407     {
    408       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
    409       // Initialize with the determinant of the row matrix
    410       Index det = 1;
    411       // Note that the diagonal blocks of U are stored in supernodes,
    412       // which are available in the  L part :)
    413       for (Index j = 0; j < this->cols(); ++j)
    414       {
    415         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
    416         {
    417           if(it.index() == j)
    418           {
    419             if(it.value()<0)
    420               det = -det;
    421             else if(it.value()==0)
    422               return 0;
    423             break;
    424           }
    425         }
    426       }
    427       return det * m_detPermR * m_detPermC;
    428     }
    429     
    430     /** \returns The determinant of the matrix.
    431       *
    432       * \sa absDeterminant(), logAbsDeterminant()
    433       */
    434     Scalar determinant()
    435     {
    436       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
    437       // Initialize with the determinant of the row matrix
    438       Scalar det = Scalar(1.);
    439       // Note that the diagonal blocks of U are stored in supernodes,
    440       // which are available in the  L part :)
    441       for (Index j = 0; j < this->cols(); ++j)
    442       {
    443         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
    444         {
    445           if(it.index() == j)
    446           {
    447             det *= it.value();
    448             break;
    449           }
    450         }
    451       }
    452       return (m_detPermR * m_detPermC) > 0 ? det : -det;
    453     }
    454 
    455     Index nnzL() const { return m_nnzL; };
    456     Index nnzU() const { return m_nnzU; };
    457 
    458   protected:
    459     // Functions 
    460     void initperfvalues()
    461     {
    462       m_perfv.panel_size = 16;
    463       m_perfv.relax = 1; 
    464       m_perfv.maxsuper = 128; 
    465       m_perfv.rowblk = 16; 
    466       m_perfv.colblk = 8; 
    467       m_perfv.fillfactor = 20;  
    468     }
    469       
    470     // Variables 
    471     mutable ComputationInfo m_info;
    472     bool m_factorizationIsOk;
    473     bool m_analysisIsOk;
    474     std::string m_lastError;
    475     NCMatrix m_mat; // The input (permuted ) matrix 
    476     SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
    477     MappedSparseMatrix<Scalar,ColMajor,StorageIndex> m_Ustore; // The upper triangular matrix
    478     PermutationType m_perm_c; // Column permutation 
    479     PermutationType m_perm_r ; // Row permutation
    480     IndexVector m_etree; // Column elimination tree 
    481     
    482     typename Base::GlobalLU_t m_glu; 
    483                                
    484     // SparseLU options 
    485     bool m_symmetricmode;
    486     // values for performance 
    487     internal::perfvalues m_perfv;
    488     RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot
    489     Index m_nnzL, m_nnzU; // Nonzeros in L and U factors
    490     Index m_detPermR, m_detPermC; // Determinants of the permutation matrices
    491   private:
    492     // Disable copy constructor 
    493     SparseLU (const SparseLU& );
    494 }; // End class SparseLU
    495 
    496 
    497 
    498 // Functions needed by the anaysis phase
    499 /** 
    500   * Compute the column permutation to minimize the fill-in
    501   * 
    502   *  - Apply this permutation to the input matrix - 
    503   * 
    504   *  - Compute the column elimination tree on the permuted matrix 
    505   * 
    506   *  - Postorder the elimination tree and the column permutation
    507   * 
    508   */
    509 template <typename MatrixType, typename OrderingType>
    510 void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
    511 {
    512   
    513   //TODO  It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.
    514   
    515   // Firstly, copy the whole input matrix. 
    516   m_mat = mat;
    517   
    518   // Compute fill-in ordering
    519   OrderingType ord; 
    520   ord(m_mat,m_perm_c);
    521   
    522   // Apply the permutation to the column of the input  matrix
    523   if (m_perm_c.size())
    524   {
    525     m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.  
    526     // Then, permute only the column pointers
    527     ei_declare_aligned_stack_constructed_variable(StorageIndex,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<StorageIndex*>(mat.outerIndexPtr()):0);
    528     
    529     // If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed.
    530     if(!mat.isCompressed()) 
    531       IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1);
    532     
    533     // Apply the permutation and compute the nnz per column.
    534     for (Index i = 0; i < mat.cols(); i++)
    535     {
    536       m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
    537       m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
    538     }
    539   }
    540   
    541   // Compute the column elimination tree of the permuted matrix 
    542   IndexVector firstRowElt;
    543   internal::coletree(m_mat, m_etree,firstRowElt); 
    544      
    545   // In symmetric mode, do not do postorder here
    546   if (!m_symmetricmode) {
    547     IndexVector post, iwork; 
    548     // Post order etree
    549     internal::treePostorder(StorageIndex(m_mat.cols()), m_etree, post); 
    550       
    551    
    552     // Renumber etree in postorder 
    553     Index m = m_mat.cols(); 
    554     iwork.resize(m+1);
    555     for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i));
    556     m_etree = iwork;
    557     
    558     // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree
    559     PermutationType post_perm(m); 
    560     for (Index i = 0; i < m; i++) 
    561       post_perm.indices()(i) = post(i); 
    562         
    563     // Combine the two permutations : postorder the permutation for future use
    564     if(m_perm_c.size()) {
    565       m_perm_c = post_perm * m_perm_c;
    566     }
    567     
    568   } // end postordering 
    569   
    570   m_analysisIsOk = true; 
    571 }
    572 
    573 // Functions needed by the numerical factorization phase
    574 
    575 
    576 /** 
    577   *  - Numerical factorization 
    578   *  - Interleaved with the symbolic factorization 
    579   * On exit,  info is 
    580   * 
    581   *    = 0: successful factorization
    582   * 
    583   *    > 0: if info = i, and i is
    584   * 
    585   *       <= A->ncol: U(i,i) is exactly zero. The factorization has
    586   *          been completed, but the factor U is exactly singular,
    587   *          and division by zero will occur if it is used to solve a
    588   *          system of equations.
    589   * 
    590   *       > A->ncol: number of bytes allocated when memory allocation
    591   *         failure occurred, plus A->ncol. If lwork = -1, it is
    592   *         the estimated amount of space needed, plus A->ncol.  
    593   */
    594 template <typename MatrixType, typename OrderingType>
    595 void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
    596 {
    597   using internal::emptyIdxLU;
    598   eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); 
    599   eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
    600   
    601   m_isInitialized = true;
    602   
    603   // Apply the column permutation computed in analyzepattern()
    604   //   m_mat = matrix * m_perm_c.inverse(); 
    605   m_mat = matrix;
    606   if (m_perm_c.size()) 
    607   {
    608     m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
    609     //Then, permute only the column pointers
    610     const StorageIndex * outerIndexPtr;
    611     if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr();
    612     else
    613     {
    614       StorageIndex* outerIndexPtr_t = new StorageIndex[matrix.cols()+1];
    615       for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];
    616       outerIndexPtr = outerIndexPtr_t;
    617     }
    618     for (Index i = 0; i < matrix.cols(); i++)
    619     {
    620       m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
    621       m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
    622     }
    623     if(!matrix.isCompressed()) delete[] outerIndexPtr;
    624   } 
    625   else 
    626   { //FIXME This should not be needed if the empty permutation is handled transparently
    627     m_perm_c.resize(matrix.cols());
    628     for(StorageIndex i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
    629   }
    630   
    631   Index m = m_mat.rows();
    632   Index n = m_mat.cols();
    633   Index nnz = m_mat.nonZeros();
    634   Index maxpanel = m_perfv.panel_size * m;
    635   // Allocate working storage common to the factor routines
    636   Index lwork = 0;
    637   Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); 
    638   if (info) 
    639   {
    640     m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ;
    641     m_factorizationIsOk = false;
    642     return ; 
    643   }
    644   
    645   // Set up pointers for integer working arrays 
    646   IndexVector segrep(m); segrep.setZero();
    647   IndexVector parent(m); parent.setZero();
    648   IndexVector xplore(m); xplore.setZero();
    649   IndexVector repfnz(maxpanel);
    650   IndexVector panel_lsub(maxpanel);
    651   IndexVector xprune(n); xprune.setZero();
    652   IndexVector marker(m*internal::LUNoMarker); marker.setZero();
    653   
    654   repfnz.setConstant(-1); 
    655   panel_lsub.setConstant(-1);
    656   
    657   // Set up pointers for scalar working arrays 
    658   ScalarVector dense; 
    659   dense.setZero(maxpanel);
    660   ScalarVector tempv; 
    661   tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) );
    662   
    663   // Compute the inverse of perm_c
    664   PermutationType iperm_c(m_perm_c.inverse()); 
    665   
    666   // Identify initial relaxed snodes
    667   IndexVector relax_end(n);
    668   if ( m_symmetricmode == true ) 
    669     Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
    670   else
    671     Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end);
    672   
    673   
    674   m_perm_r.resize(m); 
    675   m_perm_r.indices().setConstant(-1);
    676   marker.setConstant(-1);
    677   m_detPermR = 1; // Record the determinant of the row permutation
    678   
    679   m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0);
    680   m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0);
    681   
    682   // Work on one 'panel' at a time. A panel is one of the following :
    683   //  (a) a relaxed supernode at the bottom of the etree, or
    684   //  (b) panel_size contiguous columns, <panel_size> defined by the user
    685   Index jcol; 
    686   Index pivrow; // Pivotal row number in the original row matrix
    687   Index nseg1; // Number of segments in U-column above panel row jcol
    688   Index nseg; // Number of segments in each U-column 
    689   Index irep; 
    690   Index i, k, jj; 
    691   for (jcol = 0; jcol < n; )
    692   {
    693     // Adjust panel size so that a panel won't overlap with the next relaxed snode. 
    694     Index panel_size = m_perfv.panel_size; // upper bound on panel width
    695     for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
    696     {
    697       if (relax_end(k) != emptyIdxLU) 
    698       {
    699         panel_size = k - jcol; 
    700         break; 
    701       }
    702     }
    703     if (k == n) 
    704       panel_size = n - jcol; 
    705       
    706     // Symbolic outer factorization on a panel of columns 
    707     Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); 
    708     
    709     // Numeric sup-panel updates in topological order 
    710     Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); 
    711     
    712     // Sparse LU within the panel, and below the panel diagonal 
    713     for ( jj = jcol; jj< jcol + panel_size; jj++) 
    714     {
    715       k = (jj - jcol) * m; // Column index for w-wide arrays 
    716       
    717       nseg = nseg1; // begin after all the panel segments
    718       //Depth-first-search for the current column
    719       VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
    720       VectorBlock<IndexVector> repfnz_k(repfnz, k, m); 
    721       info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); 
    722       if ( info ) 
    723       {
    724         m_lastError =  "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() ";
    725         m_info = NumericalIssue; 
    726         m_factorizationIsOk = false; 
    727         return; 
    728       }
    729       // Numeric updates to this column 
    730       VectorBlock<ScalarVector> dense_k(dense, k, m); 
    731       VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); 
    732       info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); 
    733       if ( info ) 
    734       {
    735         m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() ";
    736         m_info = NumericalIssue; 
    737         m_factorizationIsOk = false; 
    738         return; 
    739       }
    740       
    741       // Copy the U-segments to ucol(*)
    742       info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); 
    743       if ( info ) 
    744       {
    745         m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() ";
    746         m_info = NumericalIssue; 
    747         m_factorizationIsOk = false; 
    748         return; 
    749       }
    750       
    751       // Form the L-segment 
    752       info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
    753       if ( info ) 
    754       {
    755         m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT ";
    756         std::ostringstream returnInfo;
    757         returnInfo << info; 
    758         m_lastError += returnInfo.str();
    759         m_info = NumericalIssue; 
    760         m_factorizationIsOk = false; 
    761         return; 
    762       }
    763       
    764       // Update the determinant of the row permutation matrix
    765       // FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot.
    766       if (pivrow != jj) m_detPermR = -m_detPermR;
    767 
    768       // Prune columns (0:jj-1) using column jj
    769       Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); 
    770       
    771       // Reset repfnz for this column 
    772       for (i = 0; i < nseg; i++)
    773       {
    774         irep = segrep(i); 
    775         repfnz_k(irep) = emptyIdxLU; 
    776       }
    777     } // end SparseLU within the panel  
    778     jcol += panel_size;  // Move to the next panel
    779   } // end for -- end elimination 
    780   
    781   m_detPermR = m_perm_r.determinant();
    782   m_detPermC = m_perm_c.determinant();
    783   
    784   // Count the number of nonzeros in factors 
    785   Base::countnz(n, m_nnzL, m_nnzU, m_glu); 
    786   // Apply permutation  to the L subscripts 
    787   Base::fixupL(n, m_perm_r.indices(), m_glu);
    788   
    789   // Create supernode matrix L 
    790   m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); 
    791   // Create the column major upper sparse matrix  U; 
    792   new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, StorageIndex> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );
    793   
    794   m_info = Success;
    795   m_factorizationIsOk = true;
    796 }
    797 
    798 template<typename MappedSupernodalType>
    799 struct SparseLUMatrixLReturnType : internal::no_assignment_operator
    800 {
    801   typedef typename MappedSupernodalType::Scalar Scalar;
    802   explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
    803   { }
    804   Index rows() const { return m_mapL.rows(); }
    805   Index cols() const { return m_mapL.cols(); }
    806   template<typename Dest>
    807   void solveInPlace( MatrixBase<Dest> &X) const
    808   {
    809     m_mapL.solveInPlace(X);
    810   }
    811   template<bool Conjugate, typename Dest>
    812   void solveTransposedInPlace( MatrixBase<Dest> &X) const
    813   {
    814     m_mapL.template solveTransposedInPlace<Conjugate>(X);
    815   }
    816 
    817   const MappedSupernodalType& m_mapL;
    818 };
    819 
    820 template<typename MatrixLType, typename MatrixUType>
    821 struct SparseLUMatrixUReturnType : internal::no_assignment_operator
    822 {
    823   typedef typename MatrixLType::Scalar Scalar;
    824   SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
    825   : m_mapL(mapL),m_mapU(mapU)
    826   { }
    827   Index rows() const { return m_mapL.rows(); }
    828   Index cols() const { return m_mapL.cols(); }
    829 
    830   template<typename Dest>   void solveInPlace(MatrixBase<Dest> &X) const
    831   {
    832     Index nrhs = X.cols();
    833     Index n    = X.rows();
    834     // Backward solve with U
    835     for (Index k = m_mapL.nsuper(); k >= 0; k--)
    836     {
    837       Index fsupc = m_mapL.supToCol()[k];
    838       Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension
    839       Index nsupc = m_mapL.supToCol()[k+1] - fsupc;
    840       Index luptr = m_mapL.colIndexPtr()[fsupc];
    841 
    842       if (nsupc == 1)
    843       {
    844         for (Index j = 0; j < nrhs; j++)
    845         {
    846           X(fsupc, j) /= m_mapL.valuePtr()[luptr];
    847         }
    848       }
    849       else
    850       {
    851         // FIXME: the following lines should use Block expressions and not Map!
    852         Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
    853         Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X.coeffRef(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
    854         U = A.template triangularView<Upper>().solve(U);
    855       }
    856 
    857       for (Index j = 0; j < nrhs; ++j)
    858       {
    859         for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)
    860         {
    861           typename MatrixUType::InnerIterator it(m_mapU, jcol);
    862           for ( ; it; ++it)
    863           {
    864             Index irow = it.index();
    865             X(irow, j) -= X(jcol, j) * it.value();
    866           }
    867         }
    868       }
    869     } // End For U-solve
    870   }
    871 
    872   template<bool Conjugate, typename Dest>   void solveTransposedInPlace(MatrixBase<Dest> &X) const
    873   {
    874     using numext::conj;
    875     Index nrhs = X.cols();
    876     Index n    = X.rows();
    877     // Forward solve with U
    878     for (Index k = 0; k <=  m_mapL.nsuper(); k++)
    879     {
    880       Index fsupc = m_mapL.supToCol()[k];
    881       Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension
    882       Index nsupc = m_mapL.supToCol()[k+1] - fsupc;
    883       Index luptr = m_mapL.colIndexPtr()[fsupc];
    884 
    885       for (Index j = 0; j < nrhs; ++j)
    886       {
    887         for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++)
    888         {
    889           typename MatrixUType::InnerIterator it(m_mapU, jcol);
    890           for ( ; it; ++it)
    891           {
    892             Index irow = it.index();
    893             X(jcol, j) -= X(irow, j) * (Conjugate? conj(it.value()): it.value());
    894           }
    895         }
    896       }
    897       if (nsupc == 1)
    898       {
    899         for (Index j = 0; j < nrhs; j++)
    900         {
    901           X(fsupc, j) /= (Conjugate? conj(m_mapL.valuePtr()[luptr]) : m_mapL.valuePtr()[luptr]);
    902         }
    903       }
    904       else
    905       {
    906         Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
    907         Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
    908         if(Conjugate)
    909           U = A.adjoint().template triangularView<Lower>().solve(U);
    910         else
    911           U = A.transpose().template triangularView<Lower>().solve(U);
    912       }
    913     }// End For U-solve
    914   }
    915 
    916 
    917   const MatrixLType& m_mapL;
    918   const MatrixUType& m_mapU;
    919 };
    920 
    921 } // End namespace Eigen 
    922 
    923 #endif