|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file dgssv.c
|
|
kusano |
7d535a |
* \brief Solves the system of linear equations A*X=B
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 3.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* October 15, 2003
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
#include "slu_ddefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* DGSSV solves the system of linear equations A*X=B, using the
|
|
kusano |
7d535a |
* LU factorization from DGSTRF. It performs the following steps:
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1. If A is stored column-wise (A->Stype = SLU_NC):
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.1. Permute the columns of A, forming A*Pc, where Pc
|
|
kusano |
7d535a |
* is a permutation matrix. For more details of this step,
|
|
kusano |
7d535a |
* see sp_preorder.c.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
|
|
kusano |
7d535a |
* by Gaussian elimination with partial pivoting.
|
|
kusano |
7d535a |
* L is unit lower triangular with offdiagonal entries
|
|
kusano |
7d535a |
* bounded by 1 in magnitude, and U is upper triangular.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.3. Solve the system of equations A*X=B using the factored
|
|
kusano |
7d535a |
* form of A.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2. If A is stored row-wise (A->Stype = SLU_NR), apply the
|
|
kusano |
7d535a |
* above algorithm to the transpose of A:
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.1. Permute columns of transpose(A) (rows of A),
|
|
kusano |
7d535a |
* forming transpose(A)*Pc, where Pc is a permutation matrix.
|
|
kusano |
7d535a |
* For more details of this step, see sp_preorder.c.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
|
|
kusano |
7d535a |
* determined by Gaussian elimination with partial pivoting.
|
|
kusano |
7d535a |
* L is unit lower triangular with offdiagonal entries
|
|
kusano |
7d535a |
* bounded by 1 in magnitude, and U is upper triangular.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.3. Solve the system of equations A*X=B using the factored
|
|
kusano |
7d535a |
* form of A.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* See supermatrix.h for the definition of 'SuperMatrix' structure.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Arguments
|
|
kusano |
7d535a |
* =========
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* options (input) superlu_options_t*
|
|
kusano |
7d535a |
* The structure defines the input parameters to control
|
|
kusano |
7d535a |
* how the LU decomposition will be performed and how the
|
|
kusano |
7d535a |
* system will be solved.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* A (input) SuperMatrix*
|
|
kusano |
7d535a |
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
|
|
kusano |
7d535a |
* of linear equations is A->nrow. Currently, the type of A can be:
|
|
kusano |
7d535a |
* Stype = SLU_NC or SLU_NR; Dtype = SLU_D; Mtype = SLU_GE.
|
|
kusano |
7d535a |
* In the future, more general A may be handled.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* perm_c (input/output) int*
|
|
kusano |
7d535a |
* If A->Stype = SLU_NC, column permutation vector of size A->ncol
|
|
kusano |
7d535a |
* which defines the permutation matrix Pc; perm_c[i] = j means
|
|
kusano |
7d535a |
* column i of A is in position j in A*Pc.
|
|
kusano |
7d535a |
* If A->Stype = SLU_NR, column permutation vector of size A->nrow
|
|
kusano |
7d535a |
* which describes permutation of columns of transpose(A)
|
|
kusano |
7d535a |
* (rows of A) as described above.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
|
|
kusano |
7d535a |
* options->Fact = SamePattern_SameRowPerm, it is an input argument.
|
|
kusano |
7d535a |
* On exit, perm_c may be overwritten by the product of the input
|
|
kusano |
7d535a |
* perm_c and a permutation that postorders the elimination tree
|
|
kusano |
7d535a |
* of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
|
|
kusano |
7d535a |
* is already in postorder.
|
|
kusano |
7d535a |
* Otherwise, it is an output argument.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* perm_r (input/output) int*
|
|
kusano |
7d535a |
* If A->Stype = SLU_NC, row permutation vector of size A->nrow,
|
|
kusano |
7d535a |
* which defines the permutation matrix Pr, and is determined
|
|
kusano |
7d535a |
* by partial pivoting. perm_r[i] = j means row i of A is in
|
|
kusano |
7d535a |
* position j in Pr*A.
|
|
kusano |
7d535a |
* If A->Stype = SLU_NR, permutation vector of size A->ncol, which
|
|
kusano |
7d535a |
* determines permutation of rows of transpose(A)
|
|
kusano |
7d535a |
* (columns of A) as described above.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* If options->RowPerm = MY_PERMR or
|
|
kusano |
7d535a |
* options->Fact = SamePattern_SameRowPerm, perm_r is an
|
|
kusano |
7d535a |
* input argument.
|
|
kusano |
7d535a |
* otherwise it is an output argument.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* L (output) SuperMatrix*
|
|
kusano |
7d535a |
* The factor L from the factorization
|
|
kusano |
7d535a |
* Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
|
|
kusano |
7d535a |
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
|
|
kusano |
7d535a |
* Uses compressed row subscripts storage for supernodes, i.e.,
|
|
kusano |
7d535a |
* L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* U (output) SuperMatrix*
|
|
kusano |
7d535a |
* The factor U from the factorization
|
|
kusano |
7d535a |
* Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
|
|
kusano |
7d535a |
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
|
|
kusano |
7d535a |
* Uses column-wise storage scheme, i.e., U has types:
|
|
kusano |
7d535a |
* Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* B (input/output) SuperMatrix*
|
|
kusano |
7d535a |
* B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
|
|
kusano |
7d535a |
* On entry, the right hand side matrix.
|
|
kusano |
7d535a |
* On exit, the solution matrix if info = 0;
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* stat (output) SuperLUStat_t*
|
|
kusano |
7d535a |
* Record the statistics on runtime and floating-point operation count.
|
|
kusano |
7d535a |
* See util.h for the definition of 'SuperLUStat_t'.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* info (output) int*
|
|
kusano |
7d535a |
* = 0: successful exit
|
|
kusano |
7d535a |
* > 0: if info = i, and i is
|
|
kusano |
7d535a |
* <= A->ncol: U(i,i) is exactly zero. The factorization has
|
|
kusano |
7d535a |
* been completed, but the factor U is exactly singular,
|
|
kusano |
7d535a |
* so the solution could not be computed.
|
|
kusano |
7d535a |
* > A->ncol: number of bytes allocated when memory allocation
|
|
kusano |
7d535a |
* failure occurred, plus A->ncol.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
dgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
|
|
kusano |
7d535a |
SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
|
|
kusano |
7d535a |
SuperLUStat_t *stat, int *info )
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
DNformat *Bstore;
|
|
kusano |
7d535a |
SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
|
|
kusano |
7d535a |
SuperMatrix AC; /* Matrix postmultiplied by Pc */
|
|
kusano |
7d535a |
int lwork = 0, *etree, i;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Set default values for some parameters */
|
|
kusano |
7d535a |
int panel_size; /* panel size */
|
|
kusano |
7d535a |
int relax; /* no of columns in a relaxed snodes */
|
|
kusano |
7d535a |
int permc_spec;
|
|
kusano |
7d535a |
trans_t trans = NOTRANS;
|
|
kusano |
7d535a |
double *utime;
|
|
kusano |
7d535a |
double t; /* Temporary time */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Test the input parameters ... */
|
|
kusano |
7d535a |
*info = 0;
|
|
kusano |
7d535a |
Bstore = B->Store;
|
|
kusano |
7d535a |
if ( options->Fact != DOFACT ) *info = -1;
|
|
kusano |
7d535a |
else if ( A->nrow != A->ncol || A->nrow < 0 ||
|
|
kusano |
7d535a |
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
|
|
kusano |
7d535a |
A->Dtype != SLU_D || A->Mtype != SLU_GE )
|
|
kusano |
7d535a |
*info = -2;
|
|
kusano |
7d535a |
else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
|
|
kusano |
7d535a |
B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
|
|
kusano |
7d535a |
*info = -7;
|
|
kusano |
7d535a |
if ( *info != 0 ) {
|
|
kusano |
7d535a |
i = -(*info);
|
|
kusano |
7d535a |
xerbla_("dgssv", &i);
|
|
kusano |
7d535a |
return;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
utime = stat->utime;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Convert A to SLU_NC format when necessary. */
|
|
kusano |
7d535a |
if ( A->Stype == SLU_NR ) {
|
|
kusano |
7d535a |
NRformat *Astore = A->Store;
|
|
kusano |
7d535a |
AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
|
|
kusano |
7d535a |
dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
|
|
kusano |
7d535a |
Astore->nzval, Astore->colind, Astore->rowptr,
|
|
kusano |
7d535a |
SLU_NC, A->Dtype, A->Mtype);
|
|
kusano |
7d535a |
trans = TRANS;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
if ( A->Stype == SLU_NC ) AA = A;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
t = SuperLU_timer_();
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Get column permutation vector perm_c[], according to permc_spec:
|
|
kusano |
7d535a |
* permc_spec = NATURAL: natural ordering
|
|
kusano |
7d535a |
* permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
|
|
kusano |
7d535a |
* permc_spec = MMD_ATA: minimum degree on structure of A'*A
|
|
kusano |
7d535a |
* permc_spec = COLAMD: approximate minimum degree column ordering
|
|
kusano |
7d535a |
* permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
permc_spec = options->ColPerm;
|
|
kusano |
7d535a |
if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
|
|
kusano |
7d535a |
get_perm_c(permc_spec, AA, perm_c);
|
|
kusano |
7d535a |
utime[COLPERM] = SuperLU_timer_() - t;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
etree = intMalloc(A->ncol);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
t = SuperLU_timer_();
|
|
kusano |
7d535a |
sp_preorder(options, AA, perm_c, etree, &AC);
|
|
kusano |
7d535a |
utime[ETREE] = SuperLU_timer_() - t;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
panel_size = sp_ienv(1);
|
|
kusano |
7d535a |
relax = sp_ienv(2);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n",
|
|
kusano |
7d535a |
relax, panel_size, sp_ienv(3), sp_ienv(4));*/
|
|
kusano |
7d535a |
t = SuperLU_timer_();
|
|
kusano |
7d535a |
/* Compute the LU factorization of A. */
|
|
kusano |
7d535a |
dgstrf(options, &AC, relax, panel_size, etree,
|
|
kusano |
7d535a |
NULL, lwork, perm_c, perm_r, L, U, stat, info);
|
|
kusano |
7d535a |
utime[FACT] = SuperLU_timer_() - t;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
t = SuperLU_timer_();
|
|
kusano |
7d535a |
if ( *info == 0 ) {
|
|
kusano |
7d535a |
/* Solve the system A*X=B, overwriting B with X. */
|
|
kusano |
7d535a |
dgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
utime[SOLVE] = SuperLU_timer_() - t;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
SUPERLU_FREE (etree);
|
|
kusano |
7d535a |
Destroy_CompCol_Permuted(&AC);
|
|
kusano |
7d535a |
if ( A->Stype == SLU_NR ) {
|
|
kusano |
7d535a |
Destroy_SuperMatrix_Store(AA);
|
|
kusano |
7d535a |
SUPERLU_FREE(AA);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|