kusano 7d535a
kusano 7d535a
/*! @file sgsisx.c
kusano 7d535a
 * \brief Computes an approximate solutions of linear equations A*X=B or A'*X=B
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 4.1) --
kusano 7d535a
 * Lawrence Berkeley National Laboratory.
kusano 7d535a
 * November, 2010
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
#include "slu_sdefs.h"
kusano 7d535a
kusano 7d535a
/*! \brief
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * Purpose
kusano 7d535a
 * =======
kusano 7d535a
 *
kusano 7d535a
 * SGSISX computes an approximate solutions of linear equations
kusano 7d535a
 * A*X=B or A'*X=B, using the ILU factorization from sgsitrf().
kusano 7d535a
 * An estimation of the condition number is provided. 
kusano 7d535a
 * The routine performs the following steps:
kusano 7d535a
 *
kusano 7d535a
 *   1. If A is stored column-wise (A->Stype = SLU_NC):
kusano 7d535a
 *  
kusano 7d535a
 *	1.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
kusano 7d535a
 *	     factors are computed to equilibrate the system:
kusano 7d535a
 *	     options->Trans = NOTRANS:
kusano 7d535a
 *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
kusano 7d535a
 *	     options->Trans = TRANS:
kusano 7d535a
 *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
kusano 7d535a
 *	     options->Trans = CONJ:
kusano 7d535a
 *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
kusano 7d535a
 *	     Whether or not the system will be equilibrated depends on the
kusano 7d535a
 *	     scaling of the matrix A, but if equilibration is used, A is
kusano 7d535a
 *	     overwritten by diag(R)*A*diag(C) and B by diag(R)*B
kusano 7d535a
 *	     (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
kusano 7d535a
 *	     = TRANS or CONJ).
kusano 7d535a
 *
kusano 7d535a
 *	1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
kusano 7d535a
 *	     matrix that usually preserves sparsity.
kusano 7d535a
 *	     For more details of this step, see sp_preorder.c.
kusano 7d535a
 *
kusano 7d535a
 *	1.3. If options->Fact != FACTORED, the LU decomposition is used to
kusano 7d535a
 *	     factor the matrix A (after equilibration if options->Equil = YES)
kusano 7d535a
 *	     as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
kusano 7d535a
 *
kusano 7d535a
 *	1.4. Compute the reciprocal pivot growth factor.
kusano 7d535a
 *
kusano 7d535a
 *	1.5. If some U(i,i) = 0, so that U is exactly singular, then the
kusano 7d535a
 *	     routine fills a small number on the diagonal entry, that is
kusano 7d535a
 *		U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n),
kusano 7d535a
 *	     and info will be increased by 1. The factored form of A is used
kusano 7d535a
 *	     to estimate the condition number of the preconditioner. If the
kusano 7d535a
 *	     reciprocal of the condition number is less than machine precision,
kusano 7d535a
 *	     info = A->ncol+1 is returned as a warning, but the routine still
kusano 7d535a
 *	     goes on to solve for X.
kusano 7d535a
 *
kusano 7d535a
 *	1.6. The system of equations is solved for X using the factored form
kusano 7d535a
 *	     of A.
kusano 7d535a
 *
kusano 7d535a
 *	1.7. options->IterRefine is not used
kusano 7d535a
 *
kusano 7d535a
 *	1.8. If equilibration was used, the matrix X is premultiplied by
kusano 7d535a
 *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
kusano 7d535a
 *	     (if options->Trans = TRANS or CONJ) so that it solves the
kusano 7d535a
 *	     original system before equilibration.
kusano 7d535a
 *
kusano 7d535a
 *	1.9. options for ILU only
kusano 7d535a
 *	     1) If options->RowPerm = LargeDiag, MC64 is used to scale and
kusano 7d535a
 *		permute the matrix to an I-matrix, that is Pr*Dr*A*Dc has
kusano 7d535a
 *		entries of modulus 1 on the diagonal and off-diagonal entries
kusano 7d535a
 *		of modulus at most 1. If MC64 fails, dgsequ() is used to
kusano 7d535a
 *		equilibrate the system.
kusano 7d535a
 *              ( Default: LargeDiag )
kusano 7d535a
 *	     2) options->ILU_DropTol = tau is the threshold for dropping.
kusano 7d535a
 *		For L, it is used directly (for the whole row in a supernode);
kusano 7d535a
 *		For U, ||A(:,i)||_oo * tau is used as the threshold
kusano 7d535a
 *	        for the	i-th column.
kusano 7d535a
 *		If a secondary dropping rule is required, tau will
kusano 7d535a
 *	        also be used to compute the second threshold.
kusano 7d535a
 *              ( Default: 1e-4 )
kusano 7d535a
 *	     3) options->ILU_FillFactor = gamma, used as the initial guess
kusano 7d535a
 *		of memory growth.
kusano 7d535a
 *		If a secondary dropping rule is required, it will also
kusano 7d535a
 *              be used as an upper bound of the memory.
kusano 7d535a
 *              ( Default: 10 )
kusano 7d535a
 *	     4) options->ILU_DropRule specifies the dropping rule.
kusano 7d535a
 *		Option	      Meaning
kusano 7d535a
 *		======	      ===========
kusano 7d535a
 *		DROP_BASIC:   Basic dropping rule, supernodal based ILUTP(tau).
kusano 7d535a
 *		DROP_PROWS:   Supernodal based ILUTP(p,tau), p = gamma*nnz(A)/n.
kusano 7d535a
 *		DROP_COLUMN:  Variant of ILUTP(p,tau), for j-th column,
kusano 7d535a
 *			      p = gamma * nnz(A(:,j)).
kusano 7d535a
 *		DROP_AREA:    Variation of ILUTP, for j-th column, use
kusano 7d535a
 *			      nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
kusano 7d535a
 *		DROP_DYNAMIC: Modify the threshold tau during factorizaion:
kusano 7d535a
 *			      If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
kusano 7d535a
 *				  tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
kusano 7d535a
 *			      Otherwise
kusano 7d535a
 *				  tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
kusano 7d535a
 *			      tau_U(j) uses the similar rule.
kusano 7d535a
 *			      NOTE: the thresholds used by L and U are separate.
kusano 7d535a
 *		DROP_INTERP:  Compute the second dropping threshold by
kusano 7d535a
 *			      interpolation instead of sorting (default).
kusano 7d535a
 *			      In this case, the actual fill ratio is not
kusano 7d535a
 *			      guaranteed smaller than gamma.
kusano 7d535a
 *		DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
kusano 7d535a
 *		( Default: DROP_BASIC | DROP_AREA )
kusano 7d535a
 *	     5) options->ILU_Norm is the criterion of measuring the magnitude
kusano 7d535a
 *		of a row in a supernode of L. ( Default is INF_NORM )
kusano 7d535a
 *		options->ILU_Norm	RowSize(x[1:n])
kusano 7d535a
 *		=================	===============
kusano 7d535a
 *		ONE_NORM		||x||_1 / n
kusano 7d535a
 *		TWO_NORM		||x||_2 / sqrt(n)
kusano 7d535a
 *		INF_NORM		max{|x[i]|}
kusano 7d535a
 *	     6) options->ILU_MILU specifies the type of MILU's variation.
kusano 7d535a
 *		= SILU: do not perform Modified ILU;
kusano 7d535a
 *		= SMILU_1 (not recommended):
kusano 7d535a
 *		    U(i,i) := U(i,i) + sum(dropped entries);
kusano 7d535a
 *		= SMILU_2:
kusano 7d535a
 *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(dropped entries);
kusano 7d535a
 *		= SMILU_3:
kusano 7d535a
 *		    U(i,i) := U(i,i) + SGN(U(i,i)) * sum(|dropped entries|);
kusano 7d535a
 *		NOTE: Even SMILU_1 does not preserve the column sum because of
kusano 7d535a
 *		late dropping.
kusano 7d535a
 *              ( Default: SILU )
kusano 7d535a
 *	     7) options->ILU_FillTol is used as the perturbation when
kusano 7d535a
 *		encountering zero pivots. If some U(i,i) = 0, so that U is
kusano 7d535a
 *		exactly singular, then
kusano 7d535a
 *		   U(i,i) := ||A(:,i)|| * options->ILU_FillTol ** (1 - i / n).
kusano 7d535a
 *              ( Default: 1e-2 )
kusano 7d535a
 *
kusano 7d535a
 *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
kusano 7d535a
 *	to the transpose of A:
kusano 7d535a
 *
kusano 7d535a
 *	2.1. If options->Equil = YES or options->RowPerm = LargeDiag, scaling
kusano 7d535a
 *	     factors are computed to equilibrate the system:
kusano 7d535a
 *	     options->Trans = NOTRANS:
kusano 7d535a
 *		 diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
kusano 7d535a
 *	     options->Trans = TRANS:
kusano 7d535a
 *		 (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
kusano 7d535a
 *	     options->Trans = CONJ:
kusano 7d535a
 *		 (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
kusano 7d535a
 *	     Whether or not the system will be equilibrated depends on the
kusano 7d535a
 *	     scaling of the matrix A, but if equilibration is used, A' is
kusano 7d535a
 *	     overwritten by diag(R)*A'*diag(C) and B by diag(R)*B
kusano 7d535a
 *	     (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
kusano 7d535a
 *
kusano 7d535a
 *	2.2. Permute columns of transpose(A) (rows of A),
kusano 7d535a
 *	     forming transpose(A)*Pc, where Pc is a permutation matrix that
kusano 7d535a
 *	     usually preserves sparsity.
kusano 7d535a
 *	     For more details of this step, see sp_preorder.c.
kusano 7d535a
 *
kusano 7d535a
 *	2.3. If options->Fact != FACTORED, the LU decomposition is used to
kusano 7d535a
 *	     factor the transpose(A) (after equilibration if
kusano 7d535a
 *	     options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
kusano 7d535a
 *	     permutation Pr determined by partial pivoting.
kusano 7d535a
 *
kusano 7d535a
 *	2.4. Compute the reciprocal pivot growth factor.
kusano 7d535a
 *
kusano 7d535a
 *	2.5. If some U(i,i) = 0, so that U is exactly singular, then the
kusano 7d535a
 *	     routine fills a small number on the diagonal entry, that is
kusano 7d535a
 *		 U(i,i) = ||A(:,i)||_oo * options->ILU_FillTol ** (1 - i / n).
kusano 7d535a
 *	     And info will be increased by 1. The factored form of A is used
kusano 7d535a
 *	     to estimate the condition number of the preconditioner. If the
kusano 7d535a
 *	     reciprocal of the condition number is less than machine precision,
kusano 7d535a
 *	     info = A->ncol+1 is returned as a warning, but the routine still
kusano 7d535a
 *	     goes on to solve for X.
kusano 7d535a
 *
kusano 7d535a
 *	2.6. The system of equations is solved for X using the factored form
kusano 7d535a
 *	     of transpose(A).
kusano 7d535a
 *
kusano 7d535a
 *	2.7. If options->IterRefine is not used.
kusano 7d535a
 *
kusano 7d535a
 *	2.8. If equilibration was used, the matrix X is premultiplied by
kusano 7d535a
 *	     diag(C) (if options->Trans = NOTRANS) or diag(R)
kusano 7d535a
 *	     (if options->Trans = TRANS or CONJ) so that it solves the
kusano 7d535a
 *	     original system before equilibration.
kusano 7d535a
 *
kusano 7d535a
 *   See supermatrix.h for the definition of 'SuperMatrix' structure.
kusano 7d535a
 *
kusano 7d535a
 * Arguments
kusano 7d535a
 * =========
kusano 7d535a
 *
kusano 7d535a
 * options (input) superlu_options_t*
kusano 7d535a
 *	   The structure defines the input parameters to control
kusano 7d535a
 *	   how the LU decomposition will be performed and how the
kusano 7d535a
 *	   system will be solved.
kusano 7d535a
 *
kusano 7d535a
 * A	   (input/output) SuperMatrix*
kusano 7d535a
 *	   Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
kusano 7d535a
 *	   of the linear equations is A->nrow. Currently, the type of A can be:
kusano 7d535a
 *	   Stype = SLU_NC or SLU_NR, Dtype = SLU_S, Mtype = SLU_GE.
kusano 7d535a
 *	   In the future, more general A may be handled.
kusano 7d535a
 *
kusano 7d535a
 *	   On entry, If options->Fact = FACTORED and equed is not 'N',
kusano 7d535a
 *	   then A must have been equilibrated by the scaling factors in
kusano 7d535a
 *	   R and/or C.
kusano 7d535a
 *	   On exit, A is not modified
kusano 7d535a
 *         if options->Equil = NO, or
kusano 7d535a
 *         if options->Equil = YES but equed = 'N' on exit, or
kusano 7d535a
 *         if options->RowPerm = NO.
kusano 7d535a
 *
kusano 7d535a
 *	   Otherwise, if options->Equil = YES and equed is not 'N',
kusano 7d535a
 *	   A is scaled as follows:
kusano 7d535a
 *	   If A->Stype = SLU_NC:
kusano 7d535a
 *	     equed = 'R':  A := diag(R) * A
kusano 7d535a
 *	     equed = 'C':  A := A * diag(C)
kusano 7d535a
 *	     equed = 'B':  A := diag(R) * A * diag(C).
kusano 7d535a
 *	   If A->Stype = SLU_NR:
kusano 7d535a
 *	     equed = 'R':  transpose(A) := diag(R) * transpose(A)
kusano 7d535a
 *	     equed = 'C':  transpose(A) := transpose(A) * diag(C)
kusano 7d535a
 *	     equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
kusano 7d535a
 *
kusano 7d535a
 *         If options->RowPerm = LargeDiag, MC64 is used to scale and permute
kusano 7d535a
 *            the matrix to an I-matrix, that is A is modified as follows:
kusano 7d535a
 *            P*Dr*A*Dc has entries of modulus 1 on the diagonal and 
kusano 7d535a
 *            off-diagonal entries of modulus at most 1. P is a permutation
kusano 7d535a
 *            obtained from MC64.
kusano 7d535a
 *            If MC64 fails, sgsequ() is used to equilibrate the system,
kusano 7d535a
 *            and A is scaled as above, there is no permutation involved.
kusano 7d535a
 *
kusano 7d535a
 * perm_c  (input/output) int*
kusano 7d535a
 *	   If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
kusano 7d535a
 *	   which defines the permutation matrix Pc; perm_c[i] = j means
kusano 7d535a
 *	   column i of A is in position j in A*Pc.
kusano 7d535a
 *	   On exit, perm_c may be overwritten by the product of the input
kusano 7d535a
 *	   perm_c and a permutation that postorders the elimination tree
kusano 7d535a
 *	   of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
kusano 7d535a
 *	   is already in postorder.
kusano 7d535a
 *
kusano 7d535a
 *	   If A->Stype = SLU_NR, column permutation vector of size A->nrow,
kusano 7d535a
 *	   which describes permutation of columns of transpose(A) 
kusano 7d535a
 *	   (rows of A) as described above.
kusano 7d535a
 *
kusano 7d535a
 * perm_r  (input/output) int*
kusano 7d535a
 *	   If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
kusano 7d535a
 *	   which defines the permutation matrix Pr, and is determined
kusano 7d535a
 *	   by partial pivoting.  perm_r[i] = j means row i of A is in 
kusano 7d535a
 *	   position j in Pr*A.
kusano 7d535a
 *
kusano 7d535a
 *	   If A->Stype = SLU_NR, permutation vector of size A->ncol, which
kusano 7d535a
 *	   determines permutation of rows of transpose(A)
kusano 7d535a
 *	   (columns of A) as described above.
kusano 7d535a
 *
kusano 7d535a
 *	   If options->Fact = SamePattern_SameRowPerm, the pivoting routine
kusano 7d535a
 *	   will try to use the input perm_r, unless a certain threshold
kusano 7d535a
 *	   criterion is violated. In that case, perm_r is overwritten by a
kusano 7d535a
 *	   new permutation determined by partial pivoting or diagonal
kusano 7d535a
 *	   threshold pivoting.
kusano 7d535a
 *	   Otherwise, perm_r is output argument.
kusano 7d535a
 *
kusano 7d535a
 * etree   (input/output) int*,  dimension (A->ncol)
kusano 7d535a
 *	   Elimination tree of Pc'*A'*A*Pc.
kusano 7d535a
 *	   If options->Fact != FACTORED and options->Fact != DOFACT,
kusano 7d535a
 *	   etree is an input argument, otherwise it is an output argument.
kusano 7d535a
 *	   Note: etree is a vector of parent pointers for a forest whose
kusano 7d535a
 *	   vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
kusano 7d535a
 *
kusano 7d535a
 * equed   (input/output) char*
kusano 7d535a
 *	   Specifies the form of equilibration that was done.
kusano 7d535a
 *	   = 'N': No equilibration.
kusano 7d535a
 *	   = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
kusano 7d535a
 *	   = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
kusano 7d535a
 *	   = 'B': Both row and column equilibration, i.e., A was replaced 
kusano 7d535a
 *		  by diag(R)*A*diag(C).
kusano 7d535a
 *	   If options->Fact = FACTORED, equed is an input argument,
kusano 7d535a
 *	   otherwise it is an output argument.
kusano 7d535a
 *
kusano 7d535a
 * R	   (input/output) float*, dimension (A->nrow)
kusano 7d535a
 *	   The row scale factors for A or transpose(A).
kusano 7d535a
 *	   If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
kusano 7d535a
 *	       (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
kusano 7d535a
 *	   If equed = 'N' or 'C', R is not accessed.
kusano 7d535a
 *	   If options->Fact = FACTORED, R is an input argument,
kusano 7d535a
 *	       otherwise, R is output.
kusano 7d535a
 *	   If options->zFact = FACTORED and equed = 'R' or 'B', each element
kusano 7d535a
 *	       of R must be positive.
kusano 7d535a
 *
kusano 7d535a
 * C	   (input/output) float*, dimension (A->ncol)
kusano 7d535a
 *	   The column scale factors for A or transpose(A).
kusano 7d535a
 *	   If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
kusano 7d535a
 *	       (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
kusano 7d535a
 *	   If equed = 'N' or 'R', C is not accessed.
kusano 7d535a
 *	   If options->Fact = FACTORED, C is an input argument,
kusano 7d535a
 *	       otherwise, C is output.
kusano 7d535a
 *	   If options->Fact = FACTORED and equed = 'C' or 'B', each element
kusano 7d535a
 *	       of C must be positive.
kusano 7d535a
 *
kusano 7d535a
 * L	   (output) SuperMatrix*
kusano 7d535a
 *	   The factor L from the factorization
kusano 7d535a
 *	       Pr*A*Pc=L*U		(if A->Stype SLU_= NC) or
kusano 7d535a
 *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
kusano 7d535a
 *	   Uses compressed row subscripts storage for supernodes, i.e.,
kusano 7d535a
 *	   L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
kusano 7d535a
 *
kusano 7d535a
 * U	   (output) SuperMatrix*
kusano 7d535a
 *	   The factor U from the factorization
kusano 7d535a
 *	       Pr*A*Pc=L*U		(if A->Stype = SLU_NC) or
kusano 7d535a
 *	       Pr*transpose(A)*Pc=L*U	(if A->Stype = SLU_NR).
kusano 7d535a
 *	   Uses column-wise storage scheme, i.e., U has types:
kusano 7d535a
 *	   Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
kusano 7d535a
 *
kusano 7d535a
 * work    (workspace/output) void*, size (lwork) (in bytes)
kusano 7d535a
 *	   User supplied workspace, should be large enough
kusano 7d535a
 *	   to hold data structures for factors L and U.
kusano 7d535a
 *	   On exit, if fact is not 'F', L and U point to this array.
kusano 7d535a
 *
kusano 7d535a
 * lwork   (input) int
kusano 7d535a
 *	   Specifies the size of work array in bytes.
kusano 7d535a
 *	   = 0:  allocate space internally by system malloc;
kusano 7d535a
 *	   > 0:  use user-supplied work array of length lwork in bytes,
kusano 7d535a
 *		 returns error if space runs out.
kusano 7d535a
 *	   = -1: the routine guesses the amount of space needed without
kusano 7d535a
 *		 performing the factorization, and returns it in
kusano 7d535a
 *		 mem_usage->total_needed; no other side effects.
kusano 7d535a
 *
kusano 7d535a
 *	   See argument 'mem_usage' for memory usage statistics.
kusano 7d535a
 *
kusano 7d535a
 * B	   (input/output) SuperMatrix*
kusano 7d535a
 *	   B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
kusano 7d535a
 *	   On entry, the right hand side matrix.
kusano 7d535a
 *	   If B->ncol = 0, only LU decomposition is performed, the triangular
kusano 7d535a
 *			   solve is skipped.
kusano 7d535a
 *	   On exit,
kusano 7d535a
 *	      if equed = 'N', B is not modified; otherwise
kusano 7d535a
 *	      if A->Stype = SLU_NC:
kusano 7d535a
 *		 if options->Trans = NOTRANS and equed = 'R' or 'B',
kusano 7d535a
 *		    B is overwritten by diag(R)*B;
kusano 7d535a
 *		 if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
kusano 7d535a
 *		    B is overwritten by diag(C)*B;
kusano 7d535a
 *	      if A->Stype = SLU_NR:
kusano 7d535a
 *		 if options->Trans = NOTRANS and equed = 'C' or 'B',
kusano 7d535a
 *		    B is overwritten by diag(C)*B;
kusano 7d535a
 *		 if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
kusano 7d535a
 *		    B is overwritten by diag(R)*B.
kusano 7d535a
 *
kusano 7d535a
 *         If options->RowPerm = LargeDiag, MC64 is used to scale and permute
kusano 7d535a
 *            the matrix A to an I-matrix. Then, in addition to the scaling
kusano 7d535a
 *            above, B is further permuted by P*B if options->Trans = NOTRANS,
kusano 7d535a
 *            where P is obtained from MC64.
kusano 7d535a
 *
kusano 7d535a
 * X	   (output) SuperMatrix*
kusano 7d535a
 *	   X has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
kusano 7d535a
 *	   If info = 0 or info = A->ncol+1, X contains the solution matrix
kusano 7d535a
 *	   to the original system of equations. Note that A and B are modified
kusano 7d535a
 *	   on exit if equed is not 'N', and the solution to the equilibrated
kusano 7d535a
 *	   system is inv(diag(C))*X if options->Trans = NOTRANS and
kusano 7d535a
 *	   equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
kusano 7d535a
 *	   and equed = 'R' or 'B'.
kusano 7d535a
 *
kusano 7d535a
 * recip_pivot_growth (output) float*
kusano 7d535a
 *	   The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
kusano 7d535a
 *	   The infinity norm is used. If recip_pivot_growth is much less
kusano 7d535a
 *	   than 1, the stability of the LU factorization could be poor.
kusano 7d535a
 *
kusano 7d535a
 * rcond   (output) float*
kusano 7d535a
 *	   The estimate of the reciprocal condition number of the matrix A
kusano 7d535a
 *	   after equilibration (if done). If rcond is less than the machine
kusano 7d535a
 *	   precision (in particular, if rcond = 0), the matrix is singular
kusano 7d535a
 *	   to working precision. This condition is indicated by a return
kusano 7d535a
 *	   code of info > 0.
kusano 7d535a
 *
kusano 7d535a
 * mem_usage (output) mem_usage_t*
kusano 7d535a
 *	   Record the memory usage statistics, consisting of following fields:
kusano 7d535a
 *	   - for_lu (float)
kusano 7d535a
 *	     The amount of space used in bytes for L\U data structures.
kusano 7d535a
 *	   - total_needed (float)
kusano 7d535a
 *	     The amount of space needed in bytes to perform factorization.
kusano 7d535a
 *	   - expansions (int)
kusano 7d535a
 *	     The number of memory expansions during the LU factorization.
kusano 7d535a
 *
kusano 7d535a
 * stat   (output) SuperLUStat_t*
kusano 7d535a
 *	  Record the statistics on runtime and floating-point operation count.
kusano 7d535a
 *	  See slu_util.h for the definition of 'SuperLUStat_t'.
kusano 7d535a
 *
kusano 7d535a
 * info    (output) int*
kusano 7d535a
 *	   = 0: successful exit
kusano 7d535a
 *	   < 0: if info = -i, the i-th argument had an illegal value
kusano 7d535a
 *	   > 0: if info = i, and i is
kusano 7d535a
 *		<= A->ncol: number of zero pivots. They are replaced by small
kusano 7d535a
 *		      entries due to options->ILU_FillTol.
kusano 7d535a
 *		= A->ncol+1: U is nonsingular, but RCOND is less than machine
kusano 7d535a
 *		      precision, meaning that the matrix is singular to
kusano 7d535a
 *		      working precision. Nevertheless, the solution and
kusano 7d535a
 *		      error bounds are computed because there are a number
kusano 7d535a
 *		      of situations where the computed solution can be more
kusano 7d535a
 *		      accurate than the value of RCOND would suggest.
kusano 7d535a
 *		> A->ncol+1: number of bytes allocated when memory allocation
kusano 7d535a
 *		      failure occurred, plus A->ncol.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
void
kusano 7d535a
sgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
kusano 7d535a
       int *etree, char *equed, float *R, float *C,
kusano 7d535a
       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
kusano 7d535a
       SuperMatrix *B, SuperMatrix *X,
kusano 7d535a
       float *recip_pivot_growth, float *rcond,
kusano 7d535a
       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info)
kusano 7d535a
{
kusano 7d535a
kusano 7d535a
    DNformat  *Bstore, *Xstore;
kusano 7d535a
    float    *Bmat, *Xmat;
kusano 7d535a
    int       ldb, ldx, nrhs;
kusano 7d535a
    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
kusano 7d535a
    SuperMatrix AC; /* Matrix postmultiplied by Pc */
kusano 7d535a
    int       colequ, equil, nofact, notran, rowequ, permc_spec, mc64;
kusano 7d535a
    trans_t   trant;
kusano 7d535a
    char      norm[1];
kusano 7d535a
    int       i, j, info1;
kusano 7d535a
    float    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
kusano 7d535a
    int       relax, panel_size;
kusano 7d535a
    float    diag_pivot_thresh;
kusano 7d535a
    double    t0;      /* temporary time */
kusano 7d535a
    double    *utime;
kusano 7d535a
kusano 7d535a
    int *perm = NULL;
kusano 7d535a
kusano 7d535a
    /* External functions */
kusano 7d535a
    extern float slangs(char *, SuperMatrix *);
kusano 7d535a
kusano 7d535a
    Bstore = B->Store;
kusano 7d535a
    Xstore = X->Store;
kusano 7d535a
    Bmat   = Bstore->nzval;
kusano 7d535a
    Xmat   = Xstore->nzval;
kusano 7d535a
    ldb    = Bstore->lda;
kusano 7d535a
    ldx    = Xstore->lda;
kusano 7d535a
    nrhs   = B->ncol;
kusano 7d535a
kusano 7d535a
    *info = 0;
kusano 7d535a
    nofact = (options->Fact != FACTORED);
kusano 7d535a
    equil = (options->Equil == YES);
kusano 7d535a
    notran = (options->Trans == NOTRANS);
kusano 7d535a
    mc64 = (options->RowPerm == LargeDiag);
kusano 7d535a
    if ( nofact ) {
kusano 7d535a
	*(unsigned char *)equed = 'N';
kusano 7d535a
	rowequ = FALSE;
kusano 7d535a
	colequ = FALSE;
kusano 7d535a
    } else {
kusano 7d535a
	rowequ = lsame_(equed, "R") || lsame_(equed, "B");
kusano 7d535a
	colequ = lsame_(equed, "C") || lsame_(equed, "B");
kusano 7d535a
	smlnum = slamch_("Safe minimum");
kusano 7d535a
	bignum = 1. / smlnum;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* Test the input parameters */
kusano 7d535a
    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
kusano 7d535a
	options->Fact != SamePattern_SameRowPerm &&
kusano 7d535a
	!notran && options->Trans != TRANS && options->Trans != CONJ &&
kusano 7d535a
	!equil && options->Equil != NO)
kusano 7d535a
	*info = -1;
kusano 7d535a
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
kusano 7d535a
	      (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
kusano 7d535a
	      A->Dtype != SLU_S || A->Mtype != SLU_GE )
kusano 7d535a
	*info = -2;
kusano 7d535a
    else if (options->Fact == FACTORED &&
kusano 7d535a
	     !(rowequ || colequ || lsame_(equed, "N")))
kusano 7d535a
	*info = -6;
kusano 7d535a
    else {
kusano 7d535a
	if (rowequ) {
kusano 7d535a
	    rcmin = bignum;
kusano 7d535a
	    rcmax = 0.;
kusano 7d535a
	    for (j = 0; j < A->nrow; ++j) {
kusano 7d535a
		rcmin = SUPERLU_MIN(rcmin, R[j]);
kusano 7d535a
		rcmax = SUPERLU_MAX(rcmax, R[j]);
kusano 7d535a
	    }
kusano 7d535a
	    if (rcmin <= 0.) *info = -7;
kusano 7d535a
	    else if ( A->nrow > 0)
kusano 7d535a
		rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
kusano 7d535a
	    else rowcnd = 1.;
kusano 7d535a
	}
kusano 7d535a
	if (colequ && *info == 0) {
kusano 7d535a
	    rcmin = bignum;
kusano 7d535a
	    rcmax = 0.;
kusano 7d535a
	    for (j = 0; j < A->nrow; ++j) {
kusano 7d535a
		rcmin = SUPERLU_MIN(rcmin, C[j]);
kusano 7d535a
		rcmax = SUPERLU_MAX(rcmax, C[j]);
kusano 7d535a
	    }
kusano 7d535a
	    if (rcmin <= 0.) *info = -8;
kusano 7d535a
	    else if (A->nrow > 0)
kusano 7d535a
		colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
kusano 7d535a
	    else colcnd = 1.;
kusano 7d535a
	}
kusano 7d535a
	if (*info == 0) {
kusano 7d535a
	    if ( lwork < -1 ) *info = -12;
kusano 7d535a
	    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
kusano 7d535a
		      B->Stype != SLU_DN || B->Dtype != SLU_S || 
kusano 7d535a
		      B->Mtype != SLU_GE )
kusano 7d535a
		*info = -13;
kusano 7d535a
	    else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
kusano 7d535a
		      (B->ncol != 0 && B->ncol != X->ncol) ||
kusano 7d535a
		      X->Stype != SLU_DN ||
kusano 7d535a
		      X->Dtype != SLU_S || X->Mtype != SLU_GE )
kusano 7d535a
		*info = -14;
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
    if (*info != 0) {
kusano 7d535a
	i = -(*info);
kusano 7d535a
	xerbla_("sgsisx", &i);
kusano 7d535a
	return;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* Initialization for factor parameters */
kusano 7d535a
    panel_size = sp_ienv(1);
kusano 7d535a
    relax      = sp_ienv(2);
kusano 7d535a
    diag_pivot_thresh = options->DiagPivotThresh;
kusano 7d535a
kusano 7d535a
    utime = stat->utime;
kusano 7d535a
kusano 7d535a
    /* Convert A to SLU_NC format when necessary. */
kusano 7d535a
    if ( A->Stype == SLU_NR ) {
kusano 7d535a
	NRformat *Astore = A->Store;
kusano 7d535a
	AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
kusano 7d535a
	sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
kusano 7d535a
			       Astore->nzval, Astore->colind, Astore->rowptr,
kusano 7d535a
			       SLU_NC, A->Dtype, A->Mtype);
kusano 7d535a
	if ( notran ) { /* Reverse the transpose argument. */
kusano 7d535a
	    trant = TRANS;
kusano 7d535a
	    notran = 0;
kusano 7d535a
	} else {
kusano 7d535a
	    trant = NOTRANS;
kusano 7d535a
	    notran = 1;
kusano 7d535a
	}
kusano 7d535a
    } else { /* A->Stype == SLU_NC */
kusano 7d535a
	trant = options->Trans;
kusano 7d535a
	AA = A;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( nofact ) {
kusano 7d535a
	register int i, j;
kusano 7d535a
	NCformat *Astore = AA->Store;
kusano 7d535a
	int nnz = Astore->nnz;
kusano 7d535a
	int *colptr = Astore->colptr;
kusano 7d535a
	int *rowind = Astore->rowind;
kusano 7d535a
	float *nzval = (float *)Astore->nzval;
kusano 7d535a
	int n = AA->nrow;
kusano 7d535a
kusano 7d535a
	if ( mc64 ) {
kusano 7d535a
	    *equed = 'B';
kusano 7d535a
    	    /*rowequ = colequ = 1;*/
kusano 7d535a
	    t0 = SuperLU_timer_();
kusano 7d535a
	    if ((perm = intMalloc(n)) == NULL)
kusano 7d535a
		ABORT("SUPERLU_MALLOC fails for perm[]");
kusano 7d535a
kusano 7d535a
	    info1 = sldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C);
kusano 7d535a
kusano 7d535a
	    if (info1 > 0) { /* MC64 fails, call sgsequ() later */
kusano 7d535a
		mc64 = 0;
kusano 7d535a
		SUPERLU_FREE(perm);
kusano 7d535a
		perm = NULL;
kusano 7d535a
	    } else {
kusano 7d535a
	        rowequ = colequ = 1;
kusano 7d535a
		for (i = 0; i < n; i++) {
kusano 7d535a
		    R[i] = exp(R[i]);
kusano 7d535a
		    C[i] = exp(C[i]);
kusano 7d535a
		}
kusano 7d535a
		/* permute and scale the matrix */
kusano 7d535a
		for (j = 0; j < n; j++) {
kusano 7d535a
		    for (i = colptr[j]; i < colptr[j + 1]; i++) {
kusano 7d535a
			nzval[i] *= R[rowind[i]] * C[j];
kusano 7d535a
			rowind[i] = perm[rowind[i]];
kusano 7d535a
		    }
kusano 7d535a
		}
kusano 7d535a
	    }
kusano 7d535a
	    utime[EQUIL] = SuperLU_timer_() - t0;
kusano 7d535a
	}
kusano 7d535a
	if ( !mc64 & equil ) {
kusano 7d535a
	    t0 = SuperLU_timer_();
kusano 7d535a
	    /* Compute row and column scalings to equilibrate the matrix A. */
kusano 7d535a
	    sgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
kusano 7d535a
kusano 7d535a
	    if ( info1 == 0 ) {
kusano 7d535a
		/* Equilibrate matrix A. */
kusano 7d535a
		slaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
kusano 7d535a
		rowequ = lsame_(equed, "R") || lsame_(equed, "B");
kusano 7d535a
		colequ = lsame_(equed, "C") || lsame_(equed, "B");
kusano 7d535a
	    }
kusano 7d535a
	    utime[EQUIL] = SuperLU_timer_() - t0;
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
kusano 7d535a
    if ( nofact ) {
kusano 7d535a
	
kusano 7d535a
	t0 = SuperLU_timer_();
kusano 7d535a
	/*
kusano 7d535a
	 * Gnet column permutation vector perm_c[], according to permc_spec:
kusano 7d535a
	 *   permc_spec = NATURAL:  natural ordering 
kusano 7d535a
	 *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
kusano 7d535a
	 *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
kusano 7d535a
	 *   permc_spec = COLAMD:   approximate minimum degree column ordering
kusano 7d535a
	 *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
kusano 7d535a
	 */
kusano 7d535a
	permc_spec = options->ColPerm;
kusano 7d535a
	if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
kusano 7d535a
	    get_perm_c(permc_spec, AA, perm_c);
kusano 7d535a
	utime[COLPERM] = SuperLU_timer_() - t0;
kusano 7d535a
kusano 7d535a
	t0 = SuperLU_timer_();
kusano 7d535a
	sp_preorder(options, AA, perm_c, etree, &AC);
kusano 7d535a
	utime[ETREE] = SuperLU_timer_() - t0;
kusano 7d535a
kusano 7d535a
	/* Compute the LU factorization of A*Pc. */
kusano 7d535a
	t0 = SuperLU_timer_();
kusano 7d535a
	sgsitrf(options, &AC, relax, panel_size, etree, work, lwork,
kusano 7d535a
                perm_c, perm_r, L, U, stat, info);
kusano 7d535a
	utime[FACT] = SuperLU_timer_() - t0;
kusano 7d535a
kusano 7d535a
	if ( lwork == -1 ) {
kusano 7d535a
	    mem_usage->total_needed = *info - A->ncol;
kusano 7d535a
	    return;
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( options->PivotGrowth ) {
kusano 7d535a
	if ( *info > 0 ) return;
kusano 7d535a
kusano 7d535a
	/* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
kusano 7d535a
	*recip_pivot_growth = sPivotGrowth(A->ncol, AA, perm_c, L, U);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( options->ConditionNumber ) {
kusano 7d535a
	/* Estimate the reciprocal of the condition number of A. */
kusano 7d535a
	t0 = SuperLU_timer_();
kusano 7d535a
	if ( notran ) {
kusano 7d535a
	    *(unsigned char *)norm = '1';
kusano 7d535a
	} else {
kusano 7d535a
	    *(unsigned char *)norm = 'I';
kusano 7d535a
	}
kusano 7d535a
	anorm = slangs(norm, AA);
kusano 7d535a
	sgscon(norm, L, U, anorm, rcond, stat, &info1);
kusano 7d535a
	utime[RCOND] = SuperLU_timer_() - t0;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( nrhs > 0 ) { /* Solve the system */
kusano 7d535a
        float *tmp, *rhs_work;
kusano 7d535a
        int n = A->nrow;
kusano 7d535a
        if ( mc64 ) {
kusano 7d535a
	    if ((tmp = floatMalloc(n)) == NULL)
kusano 7d535a
		ABORT("SUPERLU_MALLOC fails for tmp[]");
kusano 7d535a
        }
kusano 7d535a
kusano 7d535a
	/* Scale and permute the right-hand side if equilibration
kusano 7d535a
           and permutation from MC64 were performed. */
kusano 7d535a
	if ( notran ) {
kusano 7d535a
	    if ( rowequ ) {
kusano 7d535a
		for (j = 0; j < nrhs; ++j)
kusano 7d535a
		    for (i = 0; i < n; ++i)
kusano 7d535a
		        Bmat[i + j*ldb] *= R[i];
kusano 7d535a
	    }
kusano 7d535a
	    if ( mc64 ) {
kusano 7d535a
		for (j = 0; j < nrhs; ++j) {
kusano 7d535a
                   rhs_work = &Bmat[j*ldb];
kusano 7d535a
  	           for (i = 0; i < n; i++) tmp[perm[i]] = rhs_work[i];
kusano 7d535a
	           for (i = 0; i < n; i++) rhs_work[i] = tmp[i];
kusano 7d535a
                }
kusano 7d535a
	    }
kusano 7d535a
	} else if ( colequ ) {
kusano 7d535a
	    for (j = 0; j < nrhs; ++j)
kusano 7d535a
		for (i = 0; i < n; ++i) {
kusano 7d535a
	            Bmat[i + j*ldb] *= C[i];
kusano 7d535a
		}
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	/* Compute the solution matrix X. */
kusano 7d535a
	for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
kusano 7d535a
	    for (i = 0; i < B->nrow; i++)
kusano 7d535a
		Xmat[i + j*ldx] = Bmat[i + j*ldb];
kusano 7d535a
kusano 7d535a
	t0 = SuperLU_timer_();
kusano 7d535a
	sgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1);
kusano 7d535a
	utime[SOLVE] = SuperLU_timer_() - t0;
kusano 7d535a
kusano 7d535a
	/* Transform the solution matrix X to a solution of the original
kusano 7d535a
	   system. */
kusano 7d535a
	if ( notran ) {
kusano 7d535a
	    if ( colequ ) {
kusano 7d535a
		for (j = 0; j < nrhs; ++j)
kusano 7d535a
		    for (i = 0; i < n; ++i) {
kusano 7d535a
                        Xmat[i + j*ldx] *= C[i];
kusano 7d535a
                    }
kusano 7d535a
	    }
kusano 7d535a
	} else { /* transposed system */
kusano 7d535a
	    if ( rowequ ) {
kusano 7d535a
		if ( mc64 ) {
kusano 7d535a
		    for (j = 0; j < nrhs; j++) {
kusano 7d535a
			for (i = 0; i < n; i++)
kusano 7d535a
			    tmp[i] = Xmat[i + j * ldx]; /*dcopy*/
kusano 7d535a
			for (i = 0; i < n; i++)
kusano 7d535a
			    Xmat[i + j * ldx] = R[i] * tmp[perm[i]];
kusano 7d535a
		    }
kusano 7d535a
		} else {
kusano 7d535a
		    for (j = 0; j < nrhs; ++j)
kusano 7d535a
			for (i = 0; i < A->nrow; ++i) {
kusano 7d535a
              	            Xmat[i + j*ldx] *= R[i];
kusano 7d535a
                        }
kusano 7d535a
		}
kusano 7d535a
	    }
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
        if ( mc64 ) SUPERLU_FREE(tmp);
kusano 7d535a
kusano 7d535a
    } /* end if nrhs > 0 */
kusano 7d535a
kusano 7d535a
    if ( options->ConditionNumber ) {
kusano 7d535a
	/* Set INFO = A->ncol+1 if the matrix is singular to working precision. */
kusano 7d535a
	if ( *rcond < slamch_("E") && *info == 0) *info = A->ncol + 1;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if (perm) SUPERLU_FREE(perm);
kusano 7d535a
kusano 7d535a
    if ( nofact ) {
kusano 7d535a
	ilu_sQuerySpace(L, U, mem_usage);
kusano 7d535a
	Destroy_CompCol_Permuted(&AC);
kusano 7d535a
    }
kusano 7d535a
    if ( A->Stype == SLU_NR ) {
kusano 7d535a
	Destroy_SuperMatrix_Store(AA);
kusano 7d535a
	SUPERLU_FREE(AA);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
}