|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file dgsisx.c
|
|
kusano |
7d535a |
* \brief Computes an approximate solutions of linear equations A*X=B or A'*X=B
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 4.1) --
|
|
kusano |
7d535a |
* Lawrence Berkeley National Laboratory.
|
|
kusano |
7d535a |
* November, 2010
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
#include "slu_ddefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* DGSISX computes an approximate solutions of linear equations
|
|
kusano |
7d535a |
* A*X=B or A'*X=B, using the ILU factorization from dgsitrf().
|
|
kusano |
7d535a |
* An estimation of the condition number is provided.
|
|
kusano |
7d535a |
* The routine performs the following steps:
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1. If A is stored column-wise (A->Stype = SLU_NC):
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
|
|
kusano |
7d535a |
* factors are computed to equilibrate the system:
|
|
kusano |
7d535a |
* options->Trans = NOTRANS:
|
|
kusano |
7d535a |
* diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
|
|
kusano |
7d535a |
* options->Trans = TRANS:
|
|
kusano |
7d535a |
* (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
|
|
kusano |
7d535a |
* options->Trans = CONJ:
|
|
kusano |
7d535a |
* (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
|
|
kusano |
7d535a |
* Whether or not the system will be equilibrated depends on the
|
|
kusano |
7d535a |
* scaling of the matrix A, but if equilibration is used, A is
|
|
kusano |
7d535a |
* overwritten by diag(R)*A*diag(C) and B by diag(R)*B
|
|
kusano |
7d535a |
* (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
|
|
kusano |
7d535a |
* = TRANS or CONJ).
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
|
|
kusano |
7d535a |
* matrix that usually preserves sparsity.
|
|
kusano |
7d535a |
* For more details of this step, see sp_preorder.c.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.3. If options->Fact != FACTORED, the LU decomposition is used to
|
|
kusano |
7d535a |
* factor the matrix A (after equilibration if options->Equil = YES)
|
|
kusano |
7d535a |
* as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.4. Compute the reciprocal pivot growth factor.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.5. If some U(i,i) = 0, so that U is exactly singular, then the
|
|
kusano |
7d535a |
* routine fills a small number on the diagonal entry, that is
|
|
kusano |
7d535a |
* U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n),
|
|
kusano |
7d535a |
* and info will be increased by 1. The factored form of A is used
|
|
kusano |
7d535a |
* to estimate the condition number of the preconditioner. If the
|
|
kusano |
7d535a |
* reciprocal of the condition number is less than machine precision,
|
|
kusano |
7d535a |
* info = A->ncol+1 is returned as a warning, but the routine still
|
|
kusano |
7d535a |
* goes on to solve for X.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.6. The system of equations is solved for X using the factored form
|
|
kusano |
7d535a |
* of A.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.7. options->IterRefine is not used
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.8. If equilibration was used, the matrix X is premultiplied by
|
|
kusano |
7d535a |
* diag(C) (if options->Trans = NOTRANS) or diag(R)
|
|
kusano |
7d535a |
* (if options->Trans = TRANS or CONJ) so that it solves the
|
|
kusano |
7d535a |
* original system before equilibration.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 1.9. options for ILU only
|
|
kusano |
7d535a |
* 1) If options->RowPerm = LargeDiag, MC64 is used to scale and
|
|
kusano |
7d535a |
* permute the matrix to an I-matrix, that is Pr*Dr*A*Dc has
|
|
kusano |
7d535a |
* entries of modulus 1 on the diagonal and off-diagonal entries
|
|
kusano |
7d535a |
* of modulus at most 1. If MC64 fails, dgsequ() is used to
|
|
kusano |
7d535a |
* equilibrate the system.
|
|
kusano |
7d535a |
* ( Default: LargeDiag )
|
|
kusano |
7d535a |
* 2) options->ILU_DropTol = tau is the threshold for dropping.
|
|
kusano |
7d535a |
* For L, it is used directly (for the whole row in a supernode);
|
|
kusano |
7d535a |
* For U, ||A(:,i)||_oo * tau is used as the threshold
|
|
kusano |
7d535a |
* for the i-th column.
|
|
kusano |
7d535a |
* If a secondary dropping rule is required, tau will
|
|
kusano |
7d535a |
* also be used to compute the second threshold.
|
|
kusano |
7d535a |
* ( Default: 1e-4 )
|
|
kusano |
7d535a |
* 3) options->ILU_FillFactor = gamma, used as the initial guess
|
|
kusano |
7d535a |
* of memory growth.
|
|
kusano |
7d535a |
* If a secondary dropping rule is required, it will also
|
|
kusano |
7d535a |
* be used as an upper bound of the memory.
|
|
kusano |
7d535a |
* ( Default: 10 )
|
|
kusano |
7d535a |
* 4) options->ILU_DropRule specifies the dropping rule.
|
|
kusano |
7d535a |
* Option Meaning
|
|
kusano |
7d535a |
* ====== ===========
|
|
kusano |
7d535a |
* DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau).
|
|
kusano |
7d535a |
* DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma*nnz(A)/n.
|
|
kusano |
7d535a |
* DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column,
|
|
kusano |
7d535a |
* p = gamma * nnz(A(:,j)).
|
|
kusano |
7d535a |
* DROP_AREA: Variation of ILUTP, for j-th column, use
|
|
kusano |
7d535a |
* nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
|
|
kusano |
7d535a |
* DROP_DYNAMIC: Modify the threshold tau during factorizaion:
|
|
kusano |
7d535a |
* If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
|
|
kusano |
7d535a |
* tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
|
|
kusano |
7d535a |
* Otherwise
|
|
kusano |
7d535a |
* tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
|
|
kusano |
7d535a |
* tau_U(j) uses the similar rule.
|
|
kusano |
7d535a |
* NOTE: the thresholds used by L and U are separate.
|
|
kusano |
7d535a |
* DROP_INTERP: Compute the second dropping threshold by
|
|
kusano |
7d535a |
* interpolation instead of sorting (default).
|
|
kusano |
7d535a |
* In this case, the actual fill ratio is not
|
|
kusano |
7d535a |
* guaranteed smaller than gamma.
|
|
kusano |
7d535a |
* DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
|
|
kusano |
7d535a |
* ( Default: DROP_BASIC | DROP_AREA )
|
|
kusano |
7d535a |
* 5) options->ILU_Norm is the criterion of measuring the magnitude
|
|
kusano |
7d535a |
* of a row in a supernode of L. ( Default is INF_NORM )
|
|
kusano |
7d535a |
* options->ILU_Norm RowSize(x[1:n])
|
|
kusano |
7d535a |
* ================= ===============
|
|
kusano |
7d535a |
* ONE_NORM ||x||_1 / n
|
|
kusano |
7d535a |
* TWO_NORM ||x||_2 / sqrt(n)
|
|
kusano |
7d535a |
* INF_NORM max{|x[i]|}
|
|
kusano |
7d535a |
* 6) options->ILU_MILU specifies the type of MILU's variation.
|
|
kusano |
7d535a |
* = SILU: do not perform Modified ILU;
|
|
kusano |
7d535a |
* = SMILU_1 (not recommended):
|
|
kusano |
7d535a |
* U(i,i) := U(i,i) + sum(dropped entries);
|
|
kusano |
7d535a |
* = SMILU_2:
|
|
kusano |
7d535a |
* U(i,i) := U(i,i) + SGN(U(i,i)) * sum(dropped entries);
|
|
kusano |
7d535a |
* = SMILU_3:
|
|
kusano |
7d535a |
* U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|);
|
|
kusano |
7d535a |
* NOTE: Even SMILU_1 does not preserve the column sum because of
|
|
kusano |
7d535a |
* late dropping.
|
|
kusano |
7d535a |
* ( Default: SILU )
|
|
kusano |
7d535a |
* 7) options->ILU_FillTol is used as the perturbation when
|
|
kusano |
7d535a |
* encountering zero pivots. If some U(i,i) = 0, so that U is
|
|
kusano |
7d535a |
* exactly singular, then
|
|
kusano |
7d535a |
* U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n).
|
|
kusano |
7d535a |
* ( Default: 1e-2 )
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
|
|
kusano |
7d535a |
* to the transpose of A:
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
|
|
kusano |
7d535a |
* factors are computed to equilibrate the system:
|
|
kusano |
7d535a |
* options->Trans = NOTRANS:
|
|
kusano |
7d535a |
* diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
|
|
kusano |
7d535a |
* options->Trans = TRANS:
|
|
kusano |
7d535a |
* (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
|
|
kusano |
7d535a |
* options->Trans = CONJ:
|
|
kusano |
7d535a |
* (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
|
|
kusano |
7d535a |
* Whether or not the system will be equilibrated depends on the
|
|
kusano |
7d535a |
* scaling of the matrix A, but if equilibration is used, A' is
|
|
kusano |
7d535a |
* overwritten by diag(R)*A'*diag(C) and B by diag(R)*B
|
|
kusano |
7d535a |
* (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.2. Permute columns of transpose(A) (rows of A),
|
|
kusano |
7d535a |
* forming transpose(A)*Pc, where Pc is a permutation matrix that
|
|
kusano |
7d535a |
* usually preserves sparsity.
|
|
kusano |
7d535a |
* For more details of this step, see sp_preorder.c.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.3. If options->Fact != FACTORED, the LU decomposition is used to
|
|
kusano |
7d535a |
* factor the transpose(A) (after equilibration if
|
|
kusano |
7d535a |
* options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
|
|
kusano |
7d535a |
* permutation Pr determined by partial pivoting.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.4. Compute the reciprocal pivot growth factor.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.5. If some U(i,i) = 0, so that U is exactly singular, then the
|
|
kusano |
7d535a |
* routine fills a small number on the diagonal entry, that is
|
|
kusano |
7d535a |
* U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n).
|
|
kusano |
7d535a |
* And info will be increased by 1. The factored form of A is used
|
|
kusano |
7d535a |
* to estimate the condition number of the preconditioner. If the
|
|
kusano |
7d535a |
* reciprocal of the condition number is less than machine precision,
|
|
kusano |
7d535a |
* info = A->ncol+1 is returned as a warning, but the routine still
|
|
kusano |
7d535a |
* goes on to solve for X.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.6. The system of equations is solved for X using the factored form
|
|
kusano |
7d535a |
* of transpose(A).
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.7. If options->IterRefine is not used.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* 2.8. If equilibration was used, the matrix X is premultiplied by
|
|
kusano |
7d535a |
* diag(C) (if options->Trans = NOTRANS) or diag(R)
|
|
kusano |
7d535a |
* (if options->Trans = TRANS or CONJ) so that it solves the
|
|
kusano |
7d535a |
* original system before equilibration.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* See supermatrix.h for the definition of 'SuperMatrix' structure.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Arguments
|
|
kusano |
7d535a |
* =========
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* options (input) superlu_options_t*
|
|
kusano |
7d535a |
* The structure defines the input parameters to control
|
|
kusano |
7d535a |
* how the LU decomposition will be performed and how the
|
|
kusano |
7d535a |
* system will be solved.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* A (input/output) SuperMatrix*
|
|
kusano |
7d535a |
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
|
|
kusano |
7d535a |
* of the linear equations is A->nrow. Currently, the type of A can be:
|
|
kusano |
7d535a |
* Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
|
|
kusano |
7d535a |
* In the future, more general A may be handled.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* On entry, If options->Fact = FACTORED and equed is not 'N',
|
|
kusano |
7d535a |
* then A must have been equilibrated by the scaling factors in
|
|
kusano |
7d535a |
* R and/or C.
|
|
kusano |
7d535a |
* On exit, A is not modified
|
|
kusano |
7d535a |
* if options->Equil = NO, or
|
|
kusano |
7d535a |
* if options->Equil = YES but equed = 'N' on exit, or
|
|
kusano |
7d535a |
* if options->RowPerm = NO.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Otherwise, if options->Equil = YES and equed is not 'N',
|
|
kusano |
7d535a |
* A is scaled as follows:
|
|
kusano |
7d535a |
* If A->Stype = SLU_NC:
|
|
kusano |
7d535a |
* equed = 'R': A := diag(R) * A
|
|
kusano |
7d535a |
* equed = 'C': A := A * diag(C)
|
|
kusano |
7d535a |
* equed = 'B': A := diag(R) * A * diag(C).
|
|
kusano |
7d535a |
* If A->Stype = SLU_NR:
|
|
kusano |
7d535a |
* equed = 'R': transpose(A) := diag(R) * transpose(A)
|
|
kusano |
7d535a |
* equed = 'C': transpose(A) := transpose(A) * diag(C)
|
|
kusano |
7d535a |
* equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C).
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* If options->RowPerm = LargeDiag, MC64 is used to scale and permute
|
|
kusano |
7d535a |
* the matrix to an I-matrix, that is A is modified as follows:
|
|
kusano |
7d535a |
* P*Dr*A*Dc has entries of modulus 1 on the diagonal and
|
|
kusano |
7d535a |
* off-diagonal entries of modulus at most 1. P is a permutation
|
|
kusano |
7d535a |
* obtained from MC64.
|
|
kusano |
7d535a |
* If MC64 fails, dgsequ() is used to equilibrate the system,
|
|
kusano |
7d535a |
* and A is scaled as above, there is no permutation involved.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* perm_c (input/output) int*
|
|
kusano |
7d535a |
* If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
|
|
kusano |
7d535a |
* which defines the permutation matrix Pc; perm_c[i] = j means
|
|
kusano |
7d535a |
* column i of A is in position j in A*Pc.
|
|
kusano |
7d535a |
* On exit, perm_c may be overwritten by the product of the input
|
|
kusano |
7d535a |
* perm_c and a permutation that postorders the elimination tree
|
|
kusano |
7d535a |
* of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
|
|
kusano |
7d535a |
* is already in postorder.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* If A->Stype = SLU_NR, column permutation vector of size A->nrow,
|
|
kusano |
7d535a |
* which describes permutation of columns of transpose(A)
|
|
kusano |
7d535a |
* (rows of A) as described above.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* perm_r (input/output) int*
|
|
kusano |
7d535a |
* If A->Stype = SLU_NC, row permutation vector of size A->nrow,
|
|
kusano |
7d535a |
* which defines the permutation matrix Pr, and is determined
|
|
kusano |
7d535a |
* by partial pivoting. perm_r[i] = j means row i of A is in
|
|
kusano |
7d535a |
* position j in Pr*A.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* If A->Stype = SLU_NR, permutation vector of size A->ncol, which
|
|
kusano |
7d535a |
* determines permutation of rows of transpose(A)
|
|
kusano |
7d535a |
* (columns of A) as described above.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* If options->Fact = SamePattern_SameRowPerm, the pivoting routine
|
|
kusano |
7d535a |
* will try to use the input perm_r, unless a certain threshold
|
|
kusano |
7d535a |
* criterion is violated. In that case, perm_r is overwritten by a
|
|
kusano |
7d535a |
* new permutation determined by partial pivoting or diagonal
|
|
kusano |
7d535a |
* threshold pivoting.
|
|
kusano |
7d535a |
* Otherwise, perm_r is output argument.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* etree (input/output) int*, dimension (A->ncol)
|
|
kusano |
7d535a |
* Elimination tree of Pc'*A'*A*Pc.
|
|
kusano |
7d535a |
* If options->Fact != FACTORED and options->Fact != DOFACT,
|
|
kusano |
7d535a |
* etree is an input argument, otherwise it is an output argument.
|
|
kusano |
7d535a |
* Note: etree is a vector of parent pointers for a forest whose
|
|
kusano |
7d535a |
* vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* equed (input/output) char*
|
|
kusano |
7d535a |
* Specifies the form of equilibration that was done.
|
|
kusano |
7d535a |
* = 'N': No equilibration.
|
|
kusano |
7d535a |
* = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
|
|
kusano |
7d535a |
* = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
|
|
kusano |
7d535a |
* = 'B': Both row and column equilibration, i.e., A was replaced
|
|
kusano |
7d535a |
* by diag(R)*A*diag(C).
|
|
kusano |
7d535a |
* If options->Fact = FACTORED, equed is an input argument,
|
|
kusano |
7d535a |
* otherwise it is an output argument.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* R (input/output) double*, dimension (A->nrow)
|
|
kusano |
7d535a |
* The row scale factors for A or transpose(A).
|
|
kusano |
7d535a |
* If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
|
|
kusano |
7d535a |
* (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
|
|
kusano |
7d535a |
* If equed = 'N' or 'C', R is not accessed.
|
|
kusano |
7d535a |
* If options->Fact = FACTORED, R is an input argument,
|
|
kusano |
7d535a |
* otherwise, R is output.
|
|
kusano |
7d535a |
* If options->zFact = FACTORED and equed = 'R' or 'B', each element
|
|
kusano |
7d535a |
* of R must be positive.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* C (input/output) double*, dimension (A->ncol)
|
|
kusano |
7d535a |
* The column scale factors for A or transpose(A).
|
|
kusano |
7d535a |
* If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
|
|
kusano |
7d535a |
* (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
|
|
kusano |
7d535a |
* If equed = 'N' or 'R', C is not accessed.
|
|
kusano |
7d535a |
* If options->Fact = FACTORED, C is an input argument,
|
|
kusano |
7d535a |
* otherwise, C is output.
|
|
kusano |
7d535a |
* If options->Fact = FACTORED and equed = 'C' or 'B', each element
|
|
kusano |
7d535a |
* of C must be positive.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* L (output) SuperMatrix*
|
|
kusano |
7d535a |
* The factor L from the factorization
|
|
kusano |
7d535a |
* Pr*A*Pc=L*U (if A->Stype SLU_= NC) or
|
|
kusano |
7d535a |
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
|
|
kusano |
7d535a |
* Uses compressed row subscripts storage for supernodes, i.e.,
|
|
kusano |
7d535a |
* L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* U (output) SuperMatrix*
|
|
kusano |
7d535a |
* The factor U from the factorization
|
|
kusano |
7d535a |
* Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
|
|
kusano |
7d535a |
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
|
|
kusano |
7d535a |
* Uses column-wise storage scheme, i.e., U has types:
|
|
kusano |
7d535a |
* Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* work (workspace/output) void*, size (lwork) (in bytes)
|
|
kusano |
7d535a |
* User supplied workspace, should be large enough
|
|
kusano |
7d535a |
* to hold data structures for factors L and U.
|
|
kusano |
7d535a |
* On exit, if fact is not 'F', L and U point to this array.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* lwork (input) int
|
|
kusano |
7d535a |
* Specifies the size of work array in bytes.
|
|
kusano |
7d535a |
* = 0: allocate space internally by system malloc;
|
|
kusano |
7d535a |
* > 0: use user-supplied work array of length lwork in bytes,
|
|
kusano |
7d535a |
* returns error if space runs out.
|
|
kusano |
7d535a |
* = -1: the routine guesses the amount of space needed without
|
|
kusano |
7d535a |
* performing the factorization, and returns it in
|
|
kusano |
7d535a |
* mem_usage->total_needed; no other side effects.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* See argument 'mem_usage' for memory usage statistics.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* B (input/output) SuperMatrix*
|
|
kusano |
7d535a |
* B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
|
|
kusano |
7d535a |
* On entry, the right hand side matrix.
|
|
kusano |
7d535a |
* If B->ncol = 0, only LU decomposition is performed, the triangular
|
|
kusano |
7d535a |
* solve is skipped.
|
|
kusano |
7d535a |
* On exit,
|
|
kusano |
7d535a |
* if equed = 'N', B is not modified; otherwise
|
|
kusano |
7d535a |
* if A->Stype = SLU_NC:
|
|
kusano |
7d535a |
* if options->Trans = NOTRANS and equed = 'R' or 'B',
|
|
kusano |
7d535a |
* B is overwritten by diag(R)*B;
|
|
kusano |
7d535a |
* if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
|
|
kusano |
7d535a |
* B is overwritten by diag(C)*B;
|
|
kusano |
7d535a |
* if A->Stype = SLU_NR:
|
|
kusano |
7d535a |
* if options->Trans = NOTRANS and equed = 'C' or 'B',
|
|
kusano |
7d535a |
* B is overwritten by diag(C)*B;
|
|
kusano |
7d535a |
* if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
|
|
kusano |
7d535a |
* B is overwritten by diag(R)*B.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* If options->RowPerm = LargeDiag, MC64 is used to scale and permute
|
|
kusano |
7d535a |
* the matrix A to an I-matrix. Then, in addition to the scaling
|
|
kusano |
7d535a |
* above, B is further permuted by P*B if options->Trans = NOTRANS,
|
|
kusano |
7d535a |
* where P is obtained from MC64.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* X (output) SuperMatrix*
|
|
kusano |
7d535a |
* X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
|
|
kusano |
7d535a |
* If info = 0 or info = A->ncol+1, X contains the solution matrix
|
|
kusano |
7d535a |
* to the original system of equations. Note that A and B are modified
|
|
kusano |
7d535a |
* on exit if equed is not 'N', and the solution to the equilibrated
|
|
kusano |
7d535a |
* system is inv(diag(C))*X if options->Trans = NOTRANS and
|
|
kusano |
7d535a |
* equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
|
|
kusano |
7d535a |
* and equed = 'R' or 'B'.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* recip_pivot_growth (output) double*
|
|
kusano |
7d535a |
* The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
|
|
kusano |
7d535a |
* The infinity norm is used. If recip_pivot_growth is much less
|
|
kusano |
7d535a |
* than 1, the stability of the LU factorization could be poor.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* rcond (output) double*
|
|
kusano |
7d535a |
* The estimate of the reciprocal condition number of the matrix A
|
|
kusano |
7d535a |
* after equilibration (if done). If rcond is less than the machine
|
|
kusano |
7d535a |
* precision (in particular, if rcond = 0), the matrix is singular
|
|
kusano |
7d535a |
* to working precision. This condition is indicated by a return
|
|
kusano |
7d535a |
* code of info > 0.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* mem_usage (output) mem_usage_t*
|
|
kusano |
7d535a |
* Record the memory usage statistics, consisting of following fields:
|
|
kusano |
7d535a |
* - for_lu (float)
|
|
kusano |
7d535a |
* The amount of space used in bytes for L\U data structures.
|
|
kusano |
7d535a |
* - total_needed (float)
|
|
kusano |
7d535a |
* The amount of space needed in bytes to perform factorization.
|
|
kusano |
7d535a |
* - expansions (int)
|
|
kusano |
7d535a |
* The number of memory expansions during the LU factorization.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* stat (output) SuperLUStat_t*
|
|
kusano |
7d535a |
* Record the statistics on runtime and floating-point operation count.
|
|
kusano |
7d535a |
* See slu_util.h for the definition of 'SuperLUStat_t'.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* info (output) int*
|
|
kusano |
7d535a |
* = 0: successful exit
|
|
kusano |
7d535a |
* < 0: if info = -i, the i-th argument had an illegal value
|
|
kusano |
7d535a |
* > 0: if info = i, and i is
|
|
kusano |
7d535a |
* <= A->ncol: number of zero pivots. They are replaced by small
|
|
kusano |
7d535a |
* entries due to options->ILU_FillTol.
|
|
kusano |
7d535a |
* = A->ncol+1: U is nonsingular, but RCOND is less than machine
|
|
kusano |
7d535a |
* precision, meaning that the matrix is singular to
|
|
kusano |
7d535a |
* working precision. Nevertheless, the solution and
|
|
kusano |
7d535a |
* error bounds are computed because there are a number
|
|
kusano |
7d535a |
* of situations where the computed solution can be more
|
|
kusano |
7d535a |
* accurate than the value of RCOND would suggest.
|
|
kusano |
7d535a |
* > A->ncol+1: number of bytes allocated when memory allocation
|
|
kusano |
7d535a |
* failure occurred, plus A->ncol.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
|
|
kusano |
7d535a |
int *etree, char *equed, double *R, double *C,
|
|
kusano |
7d535a |
SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
|
|
kusano |
7d535a |
SuperMatrix *B, SuperMatrix *X,
|
|
kusano |
7d535a |
double *recip_pivot_growth, double *rcond,
|
|
kusano |
7d535a |
mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
DNformat *Bstore, *Xstore;
|
|
kusano |
7d535a |
double *Bmat, *Xmat;
|
|
kusano |
7d535a |
int ldb, ldx, nrhs;
|
|
kusano |
7d535a |
SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
|
|
kusano |
7d535a |
SuperMatrix AC; /* Matrix postmultiplied by Pc */
|
|
kusano |
7d535a |
int colequ, equil, nofact, notran, rowequ, permc_spec, mc64;
|
|
kusano |
7d535a |
trans_t trant;
|
|
kusano |
7d535a |
char norm[1];
|
|
kusano |
7d535a |
int i, j, info1;
|
|
kusano |
7d535a |
double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
|
|
kusano |
7d535a |
int relax, panel_size;
|
|
kusano |
7d535a |
double diag_pivot_thresh;
|
|
kusano |
7d535a |
double t0; /* temporary time */
|
|
kusano |
7d535a |
double *utime;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int *perm = NULL;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* External functions */
|
|
kusano |
7d535a |
extern double dlangs(char *, SuperMatrix *);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Bstore = B->Store;
|
|
kusano |
7d535a |
Xstore = X->Store;
|
|
kusano |
7d535a |
Bmat = Bstore->nzval;
|
|
kusano |
7d535a |
Xmat = Xstore->nzval;
|
|
kusano |
7d535a |
ldb = Bstore->lda;
|
|
kusano |
7d535a |
ldx = Xstore->lda;
|
|
kusano |
7d535a |
nrhs = B->ncol;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
*info = 0;
|
|
kusano |
7d535a |
nofact = (options->Fact != FACTORED);
|
|
kusano |
7d535a |
equil = (options->Equil == YES);
|
|
kusano |
7d535a |
notran = (options->Trans == NOTRANS);
|
|
kusano |
7d535a |
mc64 = (options->RowPerm == LargeDiag);
|
|
kusano |
7d535a |
if ( nofact ) {
|
|
kusano |
7d535a |
*(unsigned char *)equed = 'N';
|
|
kusano |
7d535a |
rowequ = FALSE;
|
|
kusano |
7d535a |
colequ = FALSE;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
rowequ = lsame_(equed, "R") || lsame_(equed, "B");
|
|
kusano |
7d535a |
colequ = lsame_(equed, "C") || lsame_(equed, "B");
|
|
kusano |
7d535a |
smlnum = dlamch_("Safe minimum");
|
|
kusano |
7d535a |
bignum = 1. / smlnum;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Test the input parameters */
|
|
kusano |
7d535a |
if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
|
|
kusano |
7d535a |
options->Fact != SamePattern_SameRowPerm &&
|
|
kusano |
7d535a |
!notran && options->Trans != TRANS && options->Trans != CONJ &&
|
|
kusano |
7d535a |
!equil && options->Equil != NO)
|
|
kusano |
7d535a |
*info = -1;
|
|
kusano |
7d535a |
else if ( A->nrow != A->ncol || A->nrow < 0 ||
|
|
kusano |
7d535a |
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
|
|
kusano |
7d535a |
A->Dtype != SLU_D || A->Mtype != SLU_GE )
|
|
kusano |
7d535a |
*info = -2;
|
|
kusano |
7d535a |
else if (options->Fact == FACTORED &&
|
|
kusano |
7d535a |
!(rowequ || colequ || lsame_(equed, "N")))
|
|
kusano |
7d535a |
*info = -6;
|
|
kusano |
7d535a |
else {
|
|
kusano |
7d535a |
if (rowequ) {
|
|
kusano |
7d535a |
rcmin = bignum;
|
|
kusano |
7d535a |
rcmax = 0.;
|
|
kusano |
7d535a |
for (j = 0; j < A->nrow; ++j) {
|
|
kusano |
7d535a |
rcmin = SUPERLU_MIN(rcmin, R[j]);
|
|
kusano |
7d535a |
rcmax = SUPERLU_MAX(rcmax, R[j]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (rcmin <= 0.) *info = -7;
|
|
kusano |
7d535a |
else if ( A->nrow > 0)
|
|
kusano |
7d535a |
rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
|
|
kusano |
7d535a |
else rowcnd = 1.;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (colequ && *info == 0) {
|
|
kusano |
7d535a |
rcmin = bignum;
|
|
kusano |
7d535a |
rcmax = 0.;
|
|
kusano |
7d535a |
for (j = 0; j < A->nrow; ++j) {
|
|
kusano |
7d535a |
rcmin = SUPERLU_MIN(rcmin, C[j]);
|
|
kusano |
7d535a |
rcmax = SUPERLU_MAX(rcmax, C[j]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (rcmin <= 0.) *info = -8;
|
|
kusano |
7d535a |
else if (A->nrow > 0)
|
|
kusano |
7d535a |
colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
|
|
kusano |
7d535a |
else colcnd = 1.;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (*info == 0) {
|
|
kusano |
7d535a |
if ( lwork < -1 ) *info = -12;
|
|
kusano |
7d535a |
else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
|
|
kusano |
7d535a |
B->Stype != SLU_DN || B->Dtype != SLU_D ||
|
|
kusano |
7d535a |
B->Mtype != SLU_GE )
|
|
kusano |
7d535a |
*info = -13;
|
|
kusano |
7d535a |
else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
|
|
kusano |
7d535a |
(B->ncol != 0 && B->ncol != X->ncol) ||
|
|
kusano |
7d535a |
X->Stype != SLU_DN ||
|
|
kusano |
7d535a |
X->Dtype != SLU_D || X->Mtype != SLU_GE )
|
|
kusano |
7d535a |
*info = -14;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (*info != 0) {
|
|
kusano |
7d535a |
i = -(*info);
|
|
kusano |
7d535a |
xerbla_("dgsisx", &i);
|
|
kusano |
7d535a |
return;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Initialization for factor parameters */
|
|
kusano |
7d535a |
panel_size = sp_ienv(1);
|
|
kusano |
7d535a |
relax = sp_ienv(2);
|
|
kusano |
7d535a |
diag_pivot_thresh = options->DiagPivotThresh;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
utime = stat->utime;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Convert A to SLU_NC format when necessary. */
|
|
kusano |
7d535a |
if ( A->Stype == SLU_NR ) {
|
|
kusano |
7d535a |
NRformat *Astore = A->Store;
|
|
kusano |
7d535a |
AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
|
|
kusano |
7d535a |
dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
|
|
kusano |
7d535a |
Astore->nzval, Astore->colind, Astore->rowptr,
|
|
kusano |
7d535a |
SLU_NC, A->Dtype, A->Mtype);
|
|
kusano |
7d535a |
if ( notran ) { /* Reverse the transpose argument. */
|
|
kusano |
7d535a |
trant = TRANS;
|
|
kusano |
7d535a |
notran = 0;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
trant = NOTRANS;
|
|
kusano |
7d535a |
notran = 1;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else { /* A->Stype == SLU_NC */
|
|
kusano |
7d535a |
trant = options->Trans;
|
|
kusano |
7d535a |
AA = A;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( nofact ) {
|
|
kusano |
7d535a |
register int i, j;
|
|
kusano |
7d535a |
NCformat *Astore = AA->Store;
|
|
kusano |
7d535a |
int nnz = Astore->nnz;
|
|
kusano |
7d535a |
int *colptr = Astore->colptr;
|
|
kusano |
7d535a |
int *rowind = Astore->rowind;
|
|
kusano |
7d535a |
double *nzval = (double *)Astore->nzval;
|
|
kusano |
7d535a |
int n = AA->nrow;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( mc64 ) {
|
|
kusano |
7d535a |
*equed = 'B';
|
|
kusano |
7d535a |
/*rowequ = colequ = 1;*/
|
|
kusano |
7d535a |
t0 = SuperLU_timer_();
|
|
kusano |
7d535a |
if ((perm = intMalloc(n)) == NULL)
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for perm[]");
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
info1 = dldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (info1 > 0) { /* MC64 fails, call dgsequ() later */
|
|
kusano |
7d535a |
mc64 = 0;
|
|
kusano |
7d535a |
SUPERLU_FREE(perm);
|
|
kusano |
7d535a |
perm = NULL;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
rowequ = colequ = 1;
|
|
kusano |
7d535a |
for (i = 0; i < n; i++) {
|
|
kusano |
7d535a |
R[i] = exp(R[i]);
|
|
kusano |
7d535a |
C[i] = exp(C[i]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
/* permute and scale the matrix */
|
|
kusano |
7d535a |
for (j = 0; j < n; j++) {
|
|
kusano |
7d535a |
for (i = colptr[j]; i < colptr[j + 1]; i++) {
|
|
kusano |
7d535a |
nzval[i] *= R[rowind[i]] * C[j];
|
|
kusano |
7d535a |
rowind[i] = perm[rowind[i]];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
utime[EQUIL] = SuperLU_timer_() - t0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( !mc64 & equil ) {
|
|
kusano |
7d535a |
t0 = SuperLU_timer_();
|
|
kusano |
7d535a |
/* Compute row and column scalings to equilibrate the matrix A. */
|
|
kusano |
7d535a |
dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( info1 == 0 ) {
|
|
kusano |
7d535a |
/* Equilibrate matrix A. */
|
|
kusano |
7d535a |
dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
|
|
kusano |
7d535a |
rowequ = lsame_(equed, "R") || lsame_(equed, "B");
|
|
kusano |
7d535a |
colequ = lsame_(equed, "C") || lsame_(equed, "B");
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
utime[EQUIL] = SuperLU_timer_() - t0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( nofact ) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
t0 = SuperLU_timer_();
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Gnet column permutation vector perm_c[], according to permc_spec:
|
|
kusano |
7d535a |
* permc_spec = NATURAL: natural ordering
|
|
kusano |
7d535a |
* permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
|
|
kusano |
7d535a |
* permc_spec = MMD_ATA: minimum degree on structure of A'*A
|
|
kusano |
7d535a |
* permc_spec = COLAMD: approximate minimum degree column ordering
|
|
kusano |
7d535a |
* permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
permc_spec = options->ColPerm;
|
|
kusano |
7d535a |
if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
|
|
kusano |
7d535a |
get_perm_c(permc_spec, AA, perm_c);
|
|
kusano |
7d535a |
utime[COLPERM] = SuperLU_timer_() - t0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
t0 = SuperLU_timer_();
|
|
kusano |
7d535a |
sp_preorder(options, AA, perm_c, etree, &AC);
|
|
kusano |
7d535a |
utime[ETREE] = SuperLU_timer_() - t0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Compute the LU factorization of A*Pc. */
|
|
kusano |
7d535a |
t0 = SuperLU_timer_();
|
|
kusano |
7d535a |
dgsitrf(options, &AC, relax, panel_size, etree, work, lwork,
|
|
kusano |
7d535a |
perm_c, perm_r, L, U, stat, info);
|
|
kusano |
7d535a |
utime[FACT] = SuperLU_timer_() - t0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( lwork == -1 ) {
|
|
kusano |
7d535a |
mem_usage->total_needed = *info - A->ncol;
|
|
kusano |
7d535a |
return;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options->PivotGrowth ) {
|
|
kusano |
7d535a |
if ( *info > 0 ) return;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
|
|
kusano |
7d535a |
*recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options->ConditionNumber ) {
|
|
kusano |
7d535a |
/* Estimate the reciprocal of the condition number of A. */
|
|
kusano |
7d535a |
t0 = SuperLU_timer_();
|
|
kusano |
7d535a |
if ( notran ) {
|
|
kusano |
7d535a |
*(unsigned char *)norm = '1';
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
*(unsigned char *)norm = 'I';
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
anorm = dlangs(norm, AA);
|
|
kusano |
7d535a |
dgscon(norm, L, U, anorm, rcond, stat, &info1);
|
|
kusano |
7d535a |
utime[RCOND] = SuperLU_timer_() - t0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( nrhs > 0 ) { /* Solve the system */
|
|
kusano |
7d535a |
double *tmp, *rhs_work;
|
|
kusano |
7d535a |
int n = A->nrow;
|
|
kusano |
7d535a |
if ( mc64 ) {
|
|
kusano |
7d535a |
if ((tmp = doubleMalloc(n)) == NULL)
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for tmp[]");
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Scale and permute the right-hand side if equilibration
|
|
kusano |
7d535a |
and permutation from MC64 were performed. */
|
|
kusano |
7d535a |
if ( notran ) {
|
|
kusano |
7d535a |
if ( rowequ ) {
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; ++j)
|
|
kusano |
7d535a |
for (i = 0; i < n; ++i)
|
|
kusano |
7d535a |
Bmat[i + j*ldb] *= R[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( mc64 ) {
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; ++j) {
|
|
kusano |
7d535a |
rhs_work = &Bmat[j*ldb];
|
|
kusano |
7d535a |
for (i = 0; i < n; i++) tmp[perm[i]] = rhs_work[i];
|
|
kusano |
7d535a |
for (i = 0; i < n; i++) rhs_work[i] = tmp[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else if ( colequ ) {
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; ++j)
|
|
kusano |
7d535a |
for (i = 0; i < n; ++i) {
|
|
kusano |
7d535a |
Bmat[i + j*ldb] *= C[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Compute the solution matrix X. */
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */
|
|
kusano |
7d535a |
for (i = 0; i < B->nrow; i++)
|
|
kusano |
7d535a |
Xmat[i + j*ldx] = Bmat[i + j*ldb];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
t0 = SuperLU_timer_();
|
|
kusano |
7d535a |
dgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1);
|
|
kusano |
7d535a |
utime[SOLVE] = SuperLU_timer_() - t0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Transform the solution matrix X to a solution of the original
|
|
kusano |
7d535a |
system. */
|
|
kusano |
7d535a |
if ( notran ) {
|
|
kusano |
7d535a |
if ( colequ ) {
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; ++j)
|
|
kusano |
7d535a |
for (i = 0; i < n; ++i) {
|
|
kusano |
7d535a |
Xmat[i + j*ldx] *= C[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else { /* transposed system */
|
|
kusano |
7d535a |
if ( rowequ ) {
|
|
kusano |
7d535a |
if ( mc64 ) {
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; j++) {
|
|
kusano |
7d535a |
for (i = 0; i < n; i++)
|
|
kusano |
7d535a |
tmp[i] = Xmat[i + j * ldx]; /*dcopy*/
|
|
kusano |
7d535a |
for (i = 0; i < n; i++)
|
|
kusano |
7d535a |
Xmat[i + j * ldx] = R[i] * tmp[perm[i]];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; ++j)
|
|
kusano |
7d535a |
for (i = 0; i < A->nrow; ++i) {
|
|
kusano |
7d535a |
Xmat[i + j*ldx] *= R[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( mc64 ) SUPERLU_FREE(tmp);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* end if nrhs > 0 */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options->ConditionNumber ) {
|
|
kusano |
7d535a |
/* Set INFO = A->ncol+1 if the matrix is singular to working precision. */
|
|
kusano |
7d535a |
if ( *rcond < dlamch_("E") && *info == 0) *info = A->ncol + 1;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (perm) SUPERLU_FREE(perm);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( nofact ) {
|
|
kusano |
7d535a |
ilu_dQuerySpace(L, U, mem_usage);
|
|
kusano |
7d535a |
Destroy_CompCol_Permuted(&AC);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( A->Stype == SLU_NR ) {
|
|
kusano |
7d535a |
Destroy_SuperMatrix_Store(AA);
|
|
kusano |
7d535a |
SUPERLU_FREE(AA);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|