kusano 7d535a
kusano 7d535a
/*! @file sgssvx.c
kusano 7d535a
 * \brief Solves the system of linear equations A*X=B or A'*X=B
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 3.0) --
kusano 7d535a
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
kusano 7d535a
 * and Lawrence Berkeley National Lab.
kusano 7d535a
 * October 15, 2003
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
#include "slu_sdefs.h"
kusano 7d535a
kusano 7d535a
/*! \brief
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * Purpose
kusano 7d535a
 * =======
kusano 7d535a
 *
kusano 7d535a
 * SGSSVX solves the system of linear equations A*X=B or A'*X=B, using
kusano 7d535a
 * the LU factorization from sgstrf(). Error bounds on the solution and
kusano 7d535a
 * a condition estimate are also provided. It performs the following steps:
kusano 7d535a
 *
kusano 7d535a
 *   1. If A is stored column-wise (A->Stype = SLU_NC):
kusano 7d535a
 *  
kusano 7d535a
 *      1.1. If options->Equil = YES, scaling factors are computed to
kusano 7d535a
 *           equilibrate the system:
kusano 7d535a
 *           options->Trans = NOTRANS:
kusano 7d535a
 *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
kusano 7d535a
 *           options->Trans = TRANS:
kusano 7d535a
 *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
kusano 7d535a
 *           options->Trans = CONJ:
kusano 7d535a
 *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
kusano 7d535a
 *           Whether or not the system will be equilibrated depends on the
kusano 7d535a
 *           scaling of the matrix A, but if equilibration is used, A is
kusano 7d535a
 *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
kusano 7d535a
 *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
kusano 7d535a
 *           = TRANS or CONJ).
kusano 7d535a
 *
kusano 7d535a
 *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
kusano 7d535a
 *           matrix that usually preserves sparsity.
kusano 7d535a
 *           For more details of this step, see sp_preorder.c.
kusano 7d535a
 *
kusano 7d535a
 *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
kusano 7d535a
 *           factor the matrix A (after equilibration if options->Equil = YES)
kusano 7d535a
 *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
kusano 7d535a
 *
kusano 7d535a
 *      1.4. Compute the reciprocal pivot growth factor.
kusano 7d535a
 *
kusano 7d535a
 *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
kusano 7d535a
 *           routine returns with info = i. Otherwise, the factored form of 
kusano 7d535a
 *           A is used to estimate the condition number of the matrix A. If
kusano 7d535a
 *           the reciprocal of the condition number is less than machine
kusano 7d535a
 *           precision, info = A->ncol+1 is returned as a warning, but the
kusano 7d535a
 *           routine still goes on to solve for X and computes error bounds
kusano 7d535a
 *           as described below.
kusano 7d535a
 *
kusano 7d535a
 *      1.6. The system of equations is solved for X using the factored form
kusano 7d535a
 *           of A.
kusano 7d535a
 *
kusano 7d535a
 *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
kusano 7d535a
 *           applied to improve the computed solution matrix and calculate
kusano 7d535a
 *           error bounds and backward error estimates for it.
kusano 7d535a
 *
kusano 7d535a
 *      1.8. If equilibration was used, the matrix X is premultiplied by
kusano 7d535a
 *           diag(C) (if options->Trans = NOTRANS) or diag(R)
kusano 7d535a
 *           (if options->Trans = TRANS or CONJ) so that it solves the
kusano 7d535a
 *           original system before equilibration.
kusano 7d535a
 *
kusano 7d535a
 *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
kusano 7d535a
 *      to the transpose of A:
kusano 7d535a
 *
kusano 7d535a
 *      2.1. If options->Equil = YES, scaling factors are computed to
kusano 7d535a
 *           equilibrate the system:
kusano 7d535a
 *           options->Trans = NOTRANS:
kusano 7d535a
 *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
kusano 7d535a
 *           options->Trans = TRANS:
kusano 7d535a
 *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
kusano 7d535a
 *           options->Trans = CONJ:
kusano 7d535a
 *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
kusano 7d535a
 *           Whether or not the system will be equilibrated depends on the
kusano 7d535a
 *           scaling of the matrix A, but if equilibration is used, A' is
kusano 7d535a
 *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
kusano 7d535a
 *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
kusano 7d535a
 *
kusano 7d535a
 *      2.2. Permute columns of transpose(A) (rows of A), 
kusano 7d535a
 *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
kusano 7d535a
 *           usually preserves sparsity.
kusano 7d535a
 *           For more details of this step, see sp_preorder.c.
kusano 7d535a
 *
kusano 7d535a
 *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
kusano 7d535a
 *           factor the transpose(A) (after equilibration if 
kusano 7d535a
 *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
kusano 7d535a
 *           permutation Pr determined by partial pivoting.
kusano 7d535a
 *
kusano 7d535a
 *      2.4. Compute the reciprocal pivot growth factor.
kusano 7d535a
 *
kusano 7d535a
 *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
kusano 7d535a
 *           routine returns with info = i. Otherwise, the factored form 
kusano 7d535a
 *           of transpose(A) is used to estimate the condition number of the
kusano 7d535a
 *           matrix A. If the reciprocal of the condition number
kusano 7d535a
 *           is less than machine precision, info = A->nrow+1 is returned as
kusano 7d535a
 *           a warning, but the routine still goes on to solve for X and
kusano 7d535a
 *           computes error bounds as described below.
kusano 7d535a
 *
kusano 7d535a
 *      2.6. The system of equations is solved for X using the factored form
kusano 7d535a
 *           of transpose(A).
kusano 7d535a
 *
kusano 7d535a
 *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
kusano 7d535a
 *           applied to improve the computed solution matrix and calculate
kusano 7d535a
 *           error bounds and backward error estimates for it.
kusano 7d535a
 *
kusano 7d535a
 *      2.8. If equilibration was used, the matrix X is premultiplied by
kusano 7d535a
 *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
kusano 7d535a
 *           (if options->Trans = TRANS or CONJ) so that it solves the
kusano 7d535a
 *           original system before equilibration.
kusano 7d535a
 *
kusano 7d535a
 *   See supermatrix.h for the definition of 'SuperMatrix' structure.
kusano 7d535a
 *
kusano 7d535a
 * Arguments
kusano 7d535a
 * =========
kusano 7d535a
 *
kusano 7d535a
 * options (input) superlu_options_t*
kusano 7d535a
 *         The structure defines the input parameters to control
kusano 7d535a
 *         how the LU decomposition will be performed and how the
kusano 7d535a
 *         system will be solved.
kusano 7d535a
 *
kusano 7d535a
 * A       (input/output) SuperMatrix*
kusano 7d535a
 *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
kusano 7d535a
 *         of the linear equations is A->nrow. Currently, the type of A can be:
kusano 7d535a
 *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
kusano 7d535a
 *         In the future, more general A may be handled.
kusano 7d535a
 *
kusano 7d535a
 *         On entry, If options->Fact = FACTORED and equed is not 'N', 
kusano 7d535a
 *         then A must have been equilibrated by the scaling factors in
kusano 7d535a
 *         R and/or C.  
kusano 7d535a
 *         On exit, A is not modified if options->Equil = NO, or if 
kusano 7d535a
 *         options->Equil = YES but equed = 'N' on exit.
kusano 7d535a
 *         Otherwise, if options->Equil = YES and equed is not 'N',
kusano 7d535a
 *         A is scaled as follows:
kusano 7d535a
 *         If A->Stype = SLU_NC:
kusano 7d535a
 *           equed = 'R':  A := diag(R) * A
kusano 7d535a
 *           equed = 'C':  A := A * diag(C)
kusano 7d535a
 *           equed = 'B':  A := diag(R) * A * diag(C).
kusano 7d535a
 *         If A->Stype = SLU_NR:
kusano 7d535a
 *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
kusano 7d535a
 *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
kusano 7d535a
 *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
kusano 7d535a
 *
kusano 7d535a
 * perm_c  (input/output) int*
kusano 7d535a
 *	   If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
kusano 7d535a
 *         which defines the permutation matrix Pc; perm_c[i] = j means
kusano 7d535a
 *         column i of A is in position j in A*Pc.
kusano 7d535a
 *         On exit, perm_c may be overwritten by the product of the input
kusano 7d535a
 *         perm_c and a permutation that postorders the elimination tree
kusano 7d535a
 *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
kusano 7d535a
 *         is already in postorder.
kusano 7d535a
 *
kusano 7d535a
 *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
kusano 7d535a
 *         which describes permutation of columns of transpose(A) 
kusano 7d535a
 *         (rows of A) as described above.
kusano 7d535a
 * 
kusano 7d535a
 * perm_r  (input/output) int*
kusano 7d535a
 *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
kusano 7d535a
 *         which defines the permutation matrix Pr, and is determined
kusano 7d535a
 *         by partial pivoting.  perm_r[i] = j means row i of A is in 
kusano 7d535a
 *         position j in Pr*A.
kusano 7d535a
 *
kusano 7d535a
 *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
kusano 7d535a
 *         determines permutation of rows of transpose(A)
kusano 7d535a
 *         (columns of A) as described above.
kusano 7d535a
 *
kusano 7d535a
 *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
kusano 7d535a
 *         will try to use the input perm_r, unless a certain threshold
kusano 7d535a
 *         criterion is violated. In that case, perm_r is overwritten by a
kusano 7d535a
 *         new permutation determined by partial pivoting or diagonal
kusano 7d535a
 *         threshold pivoting.
kusano 7d535a
 *         Otherwise, perm_r is output argument.
kusano 7d535a
 * 
kusano 7d535a
 * etree   (input/output) int*,  dimension (A->ncol)
kusano 7d535a
 *         Elimination tree of Pc'*A'*A*Pc.
kusano 7d535a
 *         If options->Fact != FACTORED and options->Fact != DOFACT,
kusano 7d535a
 *         etree is an input argument, otherwise it is an output argument.
kusano 7d535a
 *         Note: etree is a vector of parent pointers for a forest whose
kusano 7d535a
 *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
kusano 7d535a
 *
kusano 7d535a
 * equed   (input/output) char*
kusano 7d535a
 *         Specifies the form of equilibration that was done.
kusano 7d535a
 *         = 'N': No equilibration.
kusano 7d535a
 *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
kusano 7d535a
 *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
kusano 7d535a
 *         = 'B': Both row and column equilibration, i.e., A was replaced 
kusano 7d535a
 *                by diag(R)*A*diag(C).
kusano 7d535a
 *         If options->Fact = FACTORED, equed is an input argument,
kusano 7d535a
 *         otherwise it is an output argument.
kusano 7d535a
 *
kusano 7d535a
 * R       (input/output) float*, dimension (A->nrow)
kusano 7d535a
 *         The row scale factors for A or transpose(A).
kusano 7d535a
 *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
kusano 7d535a
 *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
kusano 7d535a
 *         If equed = 'N' or 'C', R is not accessed.
kusano 7d535a
 *         If options->Fact = FACTORED, R is an input argument,
kusano 7d535a
 *             otherwise, R is output.
kusano 7d535a
 *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
kusano 7d535a
 *             of R must be positive.
kusano 7d535a
 * 
kusano 7d535a
 * C       (input/output) float*, dimension (A->ncol)
kusano 7d535a
 *         The column scale factors for A or transpose(A).
kusano 7d535a
 *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
kusano 7d535a
 *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
kusano 7d535a
 *         If equed = 'N' or 'R', C is not accessed.
kusano 7d535a
 *         If options->Fact = FACTORED, C is an input argument,
kusano 7d535a
 *             otherwise, C is output.
kusano 7d535a
 *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
kusano 7d535a
 *             of C must be positive.
kusano 7d535a
 *         
kusano 7d535a
 * L       (output) SuperMatrix*
kusano 7d535a
 *	   The factor L from the factorization
kusano 7d535a
 *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
kusano 7d535a
 *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
kusano 7d535a
 *         Uses compressed row subscripts storage for supernodes, i.e.,
kusano 7d535a
 *         L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
kusano 7d535a
 *
kusano 7d535a
 * U       (output) SuperMatrix*
kusano 7d535a
 *	   The factor U from the factorization
kusano 7d535a
 *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
kusano 7d535a
 *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
kusano 7d535a
 *         Uses column-wise storage scheme, i.e., U has types:
kusano 7d535a
 *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
kusano 7d535a
 *
kusano 7d535a
 * work    (workspace/output) void*, size (lwork) (in bytes)
kusano 7d535a
 *         User supplied workspace, should be large enough
kusano 7d535a
 *         to hold data structures for factors L and U.
kusano 7d535a
 *         On exit, if fact is not 'F', L and U point to this array.
kusano 7d535a
 *
kusano 7d535a
 * lwork   (input) int
kusano 7d535a
 *         Specifies the size of work array in bytes.
kusano 7d535a
 *         = 0:  allocate space internally by system malloc;
kusano 7d535a
 *         > 0:  use user-supplied work array of length lwork in bytes,
kusano 7d535a
 *               returns error if space runs out.
kusano 7d535a
 *         = -1: the routine guesses the amount of space needed without
kusano 7d535a
 *               performing the factorization, and returns it in
kusano 7d535a
 *               mem_usage->total_needed; no other side effects.
kusano 7d535a
 *
kusano 7d535a
 *         See argument 'mem_usage' for memory usage statistics.
kusano 7d535a
 *
kusano 7d535a
 * B       (input/output) SuperMatrix*
kusano 7d535a
 *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
kusano 7d535a
 *         On entry, the right hand side matrix.
kusano 7d535a
 *         If B->ncol = 0, only LU decomposition is performed, the triangular
kusano 7d535a
 *                         solve is skipped.
kusano 7d535a
 *         On exit,
kusano 7d535a
 *            if equed = 'N', B is not modified; otherwise
kusano 7d535a
 *            if A->Stype = SLU_NC:
kusano 7d535a
 *               if options->Trans = NOTRANS and equed = 'R' or 'B',
kusano 7d535a
 *                  B is overwritten by diag(R)*B;
kusano 7d535a
 *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
kusano 7d535a
 *                  B is overwritten by diag(C)*B;
kusano 7d535a
 *            if A->Stype = SLU_NR:
kusano 7d535a
 *               if options->Trans = NOTRANS and equed = 'C' or 'B',
kusano 7d535a
 *                  B is overwritten by diag(C)*B;
kusano 7d535a
 *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
kusano 7d535a
 *                  B is overwritten by diag(R)*B.
kusano 7d535a
 *
kusano 7d535a
 * X       (output) SuperMatrix*
kusano 7d535a
 *         X has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE. 
kusano 7d535a
 *         If info = 0 or info = A->ncol+1, X contains the solution matrix
kusano 7d535a
 *         to the original system of equations. Note that A and B are modified
kusano 7d535a
 *         on exit if equed is not 'N', and the solution to the equilibrated
kusano 7d535a
 *         system is inv(diag(C))*X if options->Trans = NOTRANS and
kusano 7d535a
 *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
kusano 7d535a
 *         and equed = 'R' or 'B'.
kusano 7d535a
 *
kusano 7d535a
 * recip_pivot_growth (output) float*
kusano 7d535a
 *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
kusano 7d535a
 *         The infinity norm is used. If recip_pivot_growth is much less
kusano 7d535a
 *         than 1, the stability of the LU factorization could be poor.
kusano 7d535a
 *
kusano 7d535a
 * rcond   (output) float*
kusano 7d535a
 *         The estimate of the reciprocal condition number of the matrix A
kusano 7d535a
 *         after equilibration (if done). If rcond is less than the machine
kusano 7d535a
 *         precision (in particular, if rcond = 0), the matrix is singular
kusano 7d535a
 *         to working precision. This condition is indicated by a return
kusano 7d535a
 *         code of info > 0.
kusano 7d535a
 *
kusano 7d535a
 * FERR    (output) float*, dimension (B->ncol)   
kusano 7d535a
 *         The estimated forward error bound for each solution vector   
kusano 7d535a
 *         X(j) (the j-th column of the solution matrix X).   
kusano 7d535a
 *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
kusano 7d535a
 *         is an estimated upper bound for the magnitude of the largest 
kusano 7d535a
 *         element in (X(j) - XTRUE) divided by the magnitude of the   
kusano 7d535a
 *         largest element in X(j).  The estimate is as reliable as   
kusano 7d535a
 *         the estimate for RCOND, and is almost always a slight   
kusano 7d535a
 *         overestimate of the true error.
kusano 7d535a
 *         If options->IterRefine = NOREFINE, ferr = 1.0.
kusano 7d535a
 *
kusano 7d535a
 * BERR    (output) float*, dimension (B->ncol)
kusano 7d535a
 *         The componentwise relative backward error of each solution   
kusano 7d535a
 *         vector X(j) (i.e., the smallest relative change in   
kusano 7d535a
 *         any element of A or B that makes X(j) an exact solution).
kusano 7d535a
 *         If options->IterRefine = NOREFINE, berr = 1.0.
kusano 7d535a
 *
kusano 7d535a
 * mem_usage (output) mem_usage_t*
kusano 7d535a
 *         Record the memory usage statistics, consisting of following fields:
kusano 7d535a
 *         - for_lu (float)
kusano 7d535a
 *           The amount of space used in bytes for L\U data structures.
kusano 7d535a
 *         - total_needed (float)
kusano 7d535a
 *           The amount of space needed in bytes to perform factorization.
kusano 7d535a
 *         - expansions (int)
kusano 7d535a
 *           The number of memory expansions during the LU factorization.
kusano 7d535a
 *
kusano 7d535a
 * stat   (output) SuperLUStat_t*
kusano 7d535a
 *        Record the statistics on runtime and floating-point operation count.
kusano 7d535a
 *        See slu_util.h for the definition of 'SuperLUStat_t'.
kusano 7d535a
 *
kusano 7d535a
 * info    (output) int*
kusano 7d535a
 *         = 0: successful exit   
kusano 7d535a
 *         < 0: if info = -i, the i-th argument had an illegal value   
kusano 7d535a
 *         > 0: if info = i, and i is   
kusano 7d535a
 *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
kusano 7d535a
 *                    been completed, but the factor U is exactly   
kusano 7d535a
 *                    singular, so the solution and error bounds   
kusano 7d535a
 *                    could not be computed.   
kusano 7d535a
 *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
kusano 7d535a
 *                    precision, meaning that the matrix is singular to
kusano 7d535a
 *                    working precision. Nevertheless, the solution and
kusano 7d535a
 *                    error bounds are computed because there are a number
kusano 7d535a
 *                    of situations where the computed solution can be more
kusano 7d535a
 *                    accurate than the value of RCOND would suggest.   
kusano 7d535a
 *              > A->ncol+1: number of bytes allocated when memory allocation
kusano 7d535a
 *                    failure occurred, plus A->ncol.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
void
kusano 7d535a
sgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
kusano 7d535a
       int *etree, char *equed, float *R, float *C,
kusano 7d535a
       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
kusano 7d535a
       SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, 
kusano 7d535a
       float *rcond, float *ferr, float *berr, 
kusano 7d535a
       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
kusano 7d535a
{
kusano 7d535a
kusano 7d535a
kusano 7d535a
    DNformat  *Bstore, *Xstore;
kusano 7d535a
    float    *Bmat, *Xmat;
kusano 7d535a
    int       ldb, ldx, nrhs;
kusano 7d535a
    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
kusano 7d535a
    SuperMatrix AC; /* Matrix postmultiplied by Pc */
kusano 7d535a
    int       colequ, equil, nofact, notran, rowequ, permc_spec;
kusano 7d535a
    trans_t   trant;
kusano 7d535a
    char      norm[1];
kusano 7d535a
    int       i, j, info1;
kusano 7d535a
    float    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
kusano 7d535a
    int       relax, panel_size;
kusano 7d535a
    float    diag_pivot_thresh;
kusano 7d535a
    double    t0;      /* temporary time */
kusano 7d535a
    double    *utime;
kusano 7d535a
kusano 7d535a
    /* External functions */
kusano 7d535a
    extern float slangs(char *, SuperMatrix *);
kusano 7d535a
kusano 7d535a
    Bstore = B->Store;
kusano 7d535a
    Xstore = X->Store;
kusano 7d535a
    Bmat   = Bstore->nzval;
kusano 7d535a
    Xmat   = Xstore->nzval;
kusano 7d535a
    ldb    = Bstore->lda;
kusano 7d535a
    ldx    = Xstore->lda;
kusano 7d535a
    nrhs   = B->ncol;
kusano 7d535a
kusano 7d535a
    *info = 0;
kusano 7d535a
    nofact = (options->Fact != FACTORED);
kusano 7d535a
    equil = (options->Equil == YES);
kusano 7d535a
    notran = (options->Trans == NOTRANS);
kusano 7d535a
    if ( nofact ) {
kusano 7d535a
	*(unsigned char *)equed = 'N';
kusano 7d535a
	rowequ = FALSE;
kusano 7d535a
	colequ = FALSE;
kusano 7d535a
    } else {
kusano 7d535a
	rowequ = lsame_(equed, "R") || lsame_(equed, "B");
kusano 7d535a
	colequ = lsame_(equed, "C") || lsame_(equed, "B");
kusano 7d535a
	smlnum = slamch_("Safe minimum");
kusano 7d535a
	bignum = 1. / smlnum;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
#if 0
kusano 7d535a
printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
kusano 7d535a
       options->Fact, options->Trans, *equed);
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    /* Test the input parameters */
kusano 7d535a
    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
kusano 7d535a
	options->Fact != SamePattern_SameRowPerm &&
kusano 7d535a
	!notran && options->Trans != TRANS && options->Trans != CONJ &&
kusano 7d535a
	!equil && options->Equil != NO)
kusano 7d535a
	*info = -1;
kusano 7d535a
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
kusano 7d535a
	      (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
kusano 7d535a
	      A->Dtype != SLU_S || A->Mtype != SLU_GE )
kusano 7d535a
	*info = -2;
kusano 7d535a
    else if (options->Fact == FACTORED &&
kusano 7d535a
	     !(rowequ || colequ || lsame_(equed, "N")))
kusano 7d535a
	*info = -6;
kusano 7d535a
    else {
kusano 7d535a
	if (rowequ) {
kusano 7d535a
	    rcmin = bignum;
kusano 7d535a
	    rcmax = 0.;
kusano 7d535a
	    for (j = 0; j < A->nrow; ++j) {
kusano 7d535a
		rcmin = SUPERLU_MIN(rcmin, R[j]);
kusano 7d535a
		rcmax = SUPERLU_MAX(rcmax, R[j]);
kusano 7d535a
	    }
kusano 7d535a
	    if (rcmin <= 0.) *info = -7;
kusano 7d535a
	    else if ( A->nrow > 0)
kusano 7d535a
		rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
kusano 7d535a
	    else rowcnd = 1.;
kusano 7d535a
	}
kusano 7d535a
	if (colequ && *info == 0) {
kusano 7d535a
	    rcmin = bignum;
kusano 7d535a
	    rcmax = 0.;
kusano 7d535a
	    for (j = 0; j < A->nrow; ++j) {
kusano 7d535a
		rcmin = SUPERLU_MIN(rcmin, C[j]);
kusano 7d535a
		rcmax = SUPERLU_MAX(rcmax, C[j]);
kusano 7d535a
	    }
kusano 7d535a
	    if (rcmin <= 0.) *info = -8;
kusano 7d535a
	    else if (A->nrow > 0)
kusano 7d535a
		colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
kusano 7d535a
	    else colcnd = 1.;
kusano 7d535a
	}
kusano 7d535a
	if (*info == 0) {
kusano 7d535a
	    if ( lwork < -1 ) *info = -12;
kusano 7d535a
	    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
kusano 7d535a
		      B->Stype != SLU_DN || B->Dtype != SLU_S || 
kusano 7d535a
		      B->Mtype != SLU_GE )
kusano 7d535a
		*info = -13;
kusano 7d535a
	    else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
kusano 7d535a
		      (B->ncol != 0 && B->ncol != X->ncol) ||
kusano 7d535a
                      X->Stype != SLU_DN ||
kusano 7d535a
		      X->Dtype != SLU_S || X->Mtype != SLU_GE )
kusano 7d535a
		*info = -14;
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
    if (*info != 0) {
kusano 7d535a
	i = -(*info);
kusano 7d535a
	xerbla_("sgssvx", &i);
kusano 7d535a
	return;
kusano 7d535a
    }
kusano 7d535a
    
kusano 7d535a
    /* Initialization for factor parameters */
kusano 7d535a
    panel_size = sp_ienv(1);
kusano 7d535a
    relax      = sp_ienv(2);
kusano 7d535a
    diag_pivot_thresh = options->DiagPivotThresh;
kusano 7d535a
kusano 7d535a
    utime = stat->utime;
kusano 7d535a
    
kusano 7d535a
    /* Convert A to SLU_NC format when necessary. */
kusano 7d535a
    if ( A->Stype == SLU_NR ) {
kusano 7d535a
	NRformat *Astore = A->Store;
kusano 7d535a
	AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
kusano 7d535a
	sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
kusano 7d535a
			       Astore->nzval, Astore->colind, Astore->rowptr,
kusano 7d535a
			       SLU_NC, A->Dtype, A->Mtype);
kusano 7d535a
	if ( notran ) { /* Reverse the transpose argument. */
kusano 7d535a
	    trant = TRANS;
kusano 7d535a
	    notran = 0;
kusano 7d535a
	} else {
kusano 7d535a
	    trant = NOTRANS;
kusano 7d535a
	    notran = 1;
kusano 7d535a
	}
kusano 7d535a
    } else { /* A->Stype == SLU_NC */
kusano 7d535a
	trant = options->Trans;
kusano 7d535a
	AA = A;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( nofact && equil ) {
kusano 7d535a
	t0 = SuperLU_timer_();
kusano 7d535a
	/* Compute row and column scalings to equilibrate the matrix A. */
kusano 7d535a
	sgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
kusano 7d535a
	
kusano 7d535a
	if ( info1 == 0 ) {
kusano 7d535a
	    /* Equilibrate matrix A. */
kusano 7d535a
	    slaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
kusano 7d535a
	    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
kusano 7d535a
	    colequ = lsame_(equed, "C") || lsame_(equed, "B");
kusano 7d535a
	}
kusano 7d535a
	utime[EQUIL] = SuperLU_timer_() - t0;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
kusano 7d535a
    if ( nofact ) {
kusano 7d535a
	
kusano 7d535a
        t0 = SuperLU_timer_();
kusano 7d535a
	/*
kusano 7d535a
	 * Gnet column permutation vector perm_c[], according to permc_spec:
kusano 7d535a
	 *   permc_spec = NATURAL:  natural ordering 
kusano 7d535a
	 *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
kusano 7d535a
	 *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
kusano 7d535a
	 *   permc_spec = COLAMD:   approximate minimum degree column ordering
kusano 7d535a
	 *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
kusano 7d535a
	 */
kusano 7d535a
	permc_spec = options->ColPerm;
kusano 7d535a
	if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
kusano 7d535a
            get_perm_c(permc_spec, AA, perm_c);
kusano 7d535a
	utime[COLPERM] = SuperLU_timer_() - t0;
kusano 7d535a
kusano 7d535a
	t0 = SuperLU_timer_();
kusano 7d535a
	sp_preorder(options, AA, perm_c, etree, &AC);
kusano 7d535a
	utime[ETREE] = SuperLU_timer_() - t0;
kusano 7d535a
    
kusano 7d535a
/*	printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
kusano 7d535a
	       relax, panel_size, sp_ienv(3), sp_ienv(4));
kusano 7d535a
	fflush(stdout); */
kusano 7d535a
	
kusano 7d535a
	/* Compute the LU factorization of A*Pc. */
kusano 7d535a
	t0 = SuperLU_timer_();
kusano 7d535a
	sgstrf(options, &AC, relax, panel_size, etree,
kusano 7d535a
                work, lwork, perm_c, perm_r, L, U, stat, info);
kusano 7d535a
	utime[FACT] = SuperLU_timer_() - t0;
kusano 7d535a
	
kusano 7d535a
	if ( lwork == -1 ) {
kusano 7d535a
	    mem_usage->total_needed = *info - A->ncol;
kusano 7d535a
	    return;
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( options->PivotGrowth ) {
kusano 7d535a
        if ( *info > 0 ) {
kusano 7d535a
	    if ( *info <= A->ncol ) {
kusano 7d535a
	        /* Compute the reciprocal pivot growth factor of the leading
kusano 7d535a
	           rank-deficient *info columns of A. */
kusano 7d535a
	        *recip_pivot_growth = sPivotGrowth(*info, AA, perm_c, L, U);
kusano 7d535a
	    }
kusano 7d535a
	    return;
kusano 7d535a
        }
kusano 7d535a
kusano 7d535a
        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
kusano 7d535a
        *recip_pivot_growth = sPivotGrowth(A->ncol, AA, perm_c, L, U);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( options->ConditionNumber ) {
kusano 7d535a
        /* Estimate the reciprocal of the condition number of A. */
kusano 7d535a
        t0 = SuperLU_timer_();
kusano 7d535a
        if ( notran ) {
kusano 7d535a
	    *(unsigned char *)norm = '1';
kusano 7d535a
        } else {
kusano 7d535a
	    *(unsigned char *)norm = 'I';
kusano 7d535a
        }
kusano 7d535a
        anorm = slangs(norm, AA);
kusano 7d535a
        sgscon(norm, L, U, anorm, rcond, stat, info);
kusano 7d535a
        utime[RCOND] = SuperLU_timer_() - t0;
kusano 7d535a
    }
kusano 7d535a
    
kusano 7d535a
    if ( nrhs > 0 ) {
kusano 7d535a
        /* Scale the right hand side if equilibration was performed. */
kusano 7d535a
        if ( notran ) {
kusano 7d535a
	    if ( rowequ ) {
kusano 7d535a
	        for (j = 0; j < nrhs; ++j)
kusano 7d535a
		    for (i = 0; i < A->nrow; ++i)
kusano 7d535a
		        Bmat[i + j*ldb] *= R[i];
kusano 7d535a
	    }
kusano 7d535a
        } else if ( colequ ) {
kusano 7d535a
	    for (j = 0; j < nrhs; ++j)
kusano 7d535a
	        for (i = 0; i < A->nrow; ++i)
kusano 7d535a
	            Bmat[i + j*ldb] *= C[i];
kusano 7d535a
        }
kusano 7d535a
kusano 7d535a
        /* Compute the solution matrix X. */
kusano 7d535a
        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
kusano 7d535a
            for (i = 0; i < B->nrow; i++)
kusano 7d535a
	        Xmat[i + j*ldx] = Bmat[i + j*ldb];
kusano 7d535a
    
kusano 7d535a
        t0 = SuperLU_timer_();
kusano 7d535a
        sgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
kusano 7d535a
        utime[SOLVE] = SuperLU_timer_() - t0;
kusano 7d535a
    
kusano 7d535a
        /* Use iterative refinement to improve the computed solution and compute
kusano 7d535a
           error bounds and backward error estimates for it. */
kusano 7d535a
        t0 = SuperLU_timer_();
kusano 7d535a
        if ( options->IterRefine != NOREFINE ) {
kusano 7d535a
            sgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
kusano 7d535a
                   X, ferr, berr, stat, info);
kusano 7d535a
        } else {
kusano 7d535a
            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
kusano 7d535a
        }
kusano 7d535a
        utime[REFINE] = SuperLU_timer_() - t0;
kusano 7d535a
kusano 7d535a
        /* Transform the solution matrix X to a solution of the original system. */
kusano 7d535a
        if ( notran ) {
kusano 7d535a
	    if ( colequ ) {
kusano 7d535a
	        for (j = 0; j < nrhs; ++j)
kusano 7d535a
		    for (i = 0; i < A->nrow; ++i)
kusano 7d535a
                        Xmat[i + j*ldx] *= C[i];
kusano 7d535a
	    }
kusano 7d535a
        } else if ( rowequ ) {
kusano 7d535a
	    for (j = 0; j < nrhs; ++j)
kusano 7d535a
	        for (i = 0; i < A->nrow; ++i)
kusano 7d535a
	            Xmat[i + j*ldx] *= R[i];
kusano 7d535a
        }
kusano 7d535a
    } /* end if nrhs > 0 */
kusano 7d535a
kusano 7d535a
    if ( options->ConditionNumber ) {
kusano 7d535a
        /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */
kusano 7d535a
        if ( *rcond < slamch_("E") ) *info = A->ncol + 1;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( nofact ) {
kusano 7d535a
        sQuerySpace(L, U, mem_usage);
kusano 7d535a
        Destroy_CompCol_Permuted(&AC);
kusano 7d535a
    }
kusano 7d535a
    if ( A->Stype == SLU_NR ) {
kusano 7d535a
	Destroy_SuperMatrix_Store(AA);
kusano 7d535a
	SUPERLU_FREE(AA);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
}