|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 3.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* October 15, 2003
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
#include "slu_sdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
main(int argc, char *argv[])
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* The driver program SLINSOLX1.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* This example illustrates how to use SGSSVX to solve systems with the same
|
|
kusano |
7d535a |
* A but different right-hand side.
|
|
kusano |
7d535a |
* In this case, we factorize A only once in the first call to DGSSVX,
|
|
kusano |
7d535a |
* and reuse the following data structures in the subsequent call to SGSSVX:
|
|
kusano |
7d535a |
* perm_c, perm_r, R, C, L, U.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
char equed[1];
|
|
kusano |
7d535a |
yes_no_t equil;
|
|
kusano |
7d535a |
trans_t trans;
|
|
kusano |
7d535a |
SuperMatrix A, L, U;
|
|
kusano |
7d535a |
SuperMatrix B, X;
|
|
kusano |
7d535a |
NCformat *Astore;
|
|
kusano |
7d535a |
NCformat *Ustore;
|
|
kusano |
7d535a |
SCformat *Lstore;
|
|
kusano |
7d535a |
float *a;
|
|
kusano |
7d535a |
int *asub, *xa;
|
|
kusano |
7d535a |
int *perm_c; /* column permutation vector */
|
|
kusano |
7d535a |
int *perm_r; /* row permutations from partial pivoting */
|
|
kusano |
7d535a |
int *etree;
|
|
kusano |
7d535a |
void *work;
|
|
kusano |
7d535a |
int info, lwork, nrhs, ldx;
|
|
kusano |
7d535a |
int i, m, n, nnz;
|
|
kusano |
7d535a |
float *rhsb, *rhsx, *xact;
|
|
kusano |
7d535a |
float *R, *C;
|
|
kusano |
7d535a |
float *ferr, *berr;
|
|
kusano |
7d535a |
float u, rpg, rcond;
|
|
kusano |
7d535a |
mem_usage_t mem_usage;
|
|
kusano |
7d535a |
superlu_options_t options;
|
|
kusano |
7d535a |
SuperLUStat_t stat;
|
|
kusano |
7d535a |
extern void parse_command_line();
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#if ( DEBUGlevel>=1 )
|
|
kusano |
7d535a |
CHECK_MALLOC("Enter main()");
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Defaults */
|
|
kusano |
7d535a |
lwork = 0;
|
|
kusano |
7d535a |
nrhs = 1;
|
|
kusano |
7d535a |
equil = YES;
|
|
kusano |
7d535a |
u = 1.0;
|
|
kusano |
7d535a |
trans = NOTRANS;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Set the default values for options argument:
|
|
kusano |
7d535a |
options.Fact = DOFACT;
|
|
kusano |
7d535a |
options.Equil = YES;
|
|
kusano |
7d535a |
options.ColPerm = COLAMD;
|
|
kusano |
7d535a |
options.DiagPivotThresh = 1.0;
|
|
kusano |
7d535a |
options.Trans = NOTRANS;
|
|
kusano |
7d535a |
options.IterRefine = NOREFINE;
|
|
kusano |
7d535a |
options.SymmetricMode = NO;
|
|
kusano |
7d535a |
options.PivotGrowth = NO;
|
|
kusano |
7d535a |
options.ConditionNumber = NO;
|
|
kusano |
7d535a |
options.PrintStat = YES;
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
set_default_options(&options);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Can use command line input to modify the defaults. */
|
|
kusano |
7d535a |
parse_command_line(argc, argv, &lwork, &u, &equil, &trans);
|
|
kusano |
7d535a |
options.Equil = equil;
|
|
kusano |
7d535a |
options.DiagPivotThresh = u;
|
|
kusano |
7d535a |
options.Trans = trans;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( lwork > 0 ) {
|
|
kusano |
7d535a |
work = SUPERLU_MALLOC(lwork);
|
|
kusano |
7d535a |
if ( !work ) {
|
|
kusano |
7d535a |
ABORT("SLINSOLX: cannot allocate work[]");
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Read matrix A from a file in Harwell-Boeing format.*/
|
|
kusano |
7d535a |
sreadhb(&m, &n, &nnz, &a, &asub, &xa);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE);
|
|
kusano |
7d535a |
Astore = A.Store;
|
|
kusano |
7d535a |
printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
|
|
kusano |
7d535a |
if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
|
|
kusano |
7d535a |
sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE);
|
|
kusano |
7d535a |
sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE);
|
|
kusano |
7d535a |
xact = floatMalloc(n * nrhs);
|
|
kusano |
7d535a |
ldx = n;
|
|
kusano |
7d535a |
sGenXtrue(n, nrhs, xact, ldx);
|
|
kusano |
7d535a |
sFillRHS(trans, nrhs, xact, ldx, &A, &B);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
|
|
kusano |
7d535a |
if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
|
|
kusano |
7d535a |
if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
|
|
kusano |
7d535a |
if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) )
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for R[].");
|
|
kusano |
7d535a |
if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) )
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for C[].");
|
|
kusano |
7d535a |
if ( !(ferr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) )
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for ferr[].");
|
|
kusano |
7d535a |
if ( !(berr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) )
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for berr[].");
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Initialize the statistics variables. */
|
|
kusano |
7d535a |
StatInit(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* ONLY PERFORM THE LU DECOMPOSITION */
|
|
kusano |
7d535a |
B.ncol = 0; /* Indicate not to solve the system */
|
|
kusano |
7d535a |
sgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
|
|
kusano |
7d535a |
&L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
|
|
kusano |
7d535a |
&mem_usage, &stat, &info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
printf("LU factorization: sgssvx() returns info %d\n", info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( info == 0 || info == n+1 ) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);
|
|
kusano |
7d535a |
if ( options.ConditionNumber )
|
|
kusano |
7d535a |
printf("Recip. condition number = %e\n", rcond);
|
|
kusano |
7d535a |
Lstore = (SCformat *) L.Store;
|
|
kusano |
7d535a |
Ustore = (NCformat *) U.Store;
|
|
kusano |
7d535a |
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
|
|
kusano |
7d535a |
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
|
|
kusano |
7d535a |
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
|
|
kusano |
7d535a |
printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
|
|
kusano |
7d535a |
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} else if ( info > 0 && lwork == -1 ) {
|
|
kusano |
7d535a |
printf("** Estimated memory: %d bytes\n", info - n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options.PrintStat ) StatPrint(&stat);
|
|
kusano |
7d535a |
StatFree(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* ------------------------------------------------------------
|
|
kusano |
7d535a |
NOW WE SOLVE THE LINEAR SYSTEM USING THE FACTORED FORM OF A.
|
|
kusano |
7d535a |
------------------------------------------------------------*/
|
|
kusano |
7d535a |
options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
|
|
kusano |
7d535a |
B.ncol = nrhs; /* Set the number of right-hand side */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Initialize the statistics variables. */
|
|
kusano |
7d535a |
StatInit(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
sgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
|
|
kusano |
7d535a |
&L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
|
|
kusano |
7d535a |
&mem_usage, &stat, &info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
printf("Triangular solve: sgssvx() returns info %d\n", info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( info == 0 || info == n+1 ) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* This is how you could access the solution matrix. */
|
|
kusano |
7d535a |
float *sol = (float*) ((DNformat*) X.Store)->nzval;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options.IterRefine ) {
|
|
kusano |
7d535a |
printf("Iterative Refinement:\n");
|
|
kusano |
7d535a |
printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
|
|
kusano |
7d535a |
for (i = 0; i < nrhs; ++i)
|
|
kusano |
7d535a |
printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
} else if ( info > 0 && lwork == -1 ) {
|
|
kusano |
7d535a |
printf("** Estimated memory: %d bytes\n", info - n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options.PrintStat ) StatPrint(&stat);
|
|
kusano |
7d535a |
StatFree(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
SUPERLU_FREE (rhsb);
|
|
kusano |
7d535a |
SUPERLU_FREE (rhsx);
|
|
kusano |
7d535a |
SUPERLU_FREE (xact);
|
|
kusano |
7d535a |
SUPERLU_FREE (etree);
|
|
kusano |
7d535a |
SUPERLU_FREE (perm_r);
|
|
kusano |
7d535a |
SUPERLU_FREE (perm_c);
|
|
kusano |
7d535a |
SUPERLU_FREE (R);
|
|
kusano |
7d535a |
SUPERLU_FREE (C);
|
|
kusano |
7d535a |
SUPERLU_FREE (ferr);
|
|
kusano |
7d535a |
SUPERLU_FREE (berr);
|
|
kusano |
7d535a |
Destroy_CompCol_Matrix(&A);
|
|
kusano |
7d535a |
Destroy_SuperMatrix_Store(&B);
|
|
kusano |
7d535a |
Destroy_SuperMatrix_Store(&X);
|
|
kusano |
7d535a |
if ( lwork == 0 ) {
|
|
kusano |
7d535a |
Destroy_SuperNode_Matrix(&L);
|
|
kusano |
7d535a |
Destroy_CompCol_Matrix(&U);
|
|
kusano |
7d535a |
} else if ( lwork > 0 ) {
|
|
kusano |
7d535a |
SUPERLU_FREE(work);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#if ( DEBUGlevel>=1 )
|
|
kusano |
7d535a |
CHECK_MALLOC("Exit main()");
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Parse command line options to get relaxed snode size, panel size, etc.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
parse_command_line(int argc, char *argv[], int *lwork,
|
|
kusano |
7d535a |
float *u, yes_no_t *equil, trans_t *trans )
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
int c;
|
|
kusano |
7d535a |
extern char *optarg;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
while ( (c = getopt(argc, argv, "hl:u:e:t:")) != EOF ) {
|
|
kusano |
7d535a |
switch (c) {
|
|
kusano |
7d535a |
case 'h':
|
|
kusano |
7d535a |
printf("Options:\n");
|
|
kusano |
7d535a |
printf("\t-l <int> - length of work[*] array\n");</int>
|
|
kusano |
7d535a |
printf("\t-u <int> - pivoting threshold\n");</int>
|
|
kusano |
7d535a |
printf("\t-e <0 or 1> - equilibrate or not\n");
|
|
kusano |
7d535a |
printf("\t-t <0 or 1> - solve transposed system or not\n");
|
|
kusano |
7d535a |
exit(1);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case 'l': *lwork = atoi(optarg);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case 'u': *u = atof(optarg);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case 'e': *equil = atoi(optarg);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case 't': *trans = atoi(optarg);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|