|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 3.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* October 15, 2003
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
#include "slu_ddefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
main(int argc, char *argv[])
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* The driver program DLINSOLX2.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* This example illustrates how to use DGSSVX to solve systems repeatedly
|
|
kusano |
7d535a |
* with the same sparsity pattern of matrix A.
|
|
kusano |
7d535a |
* In this case, the column permutation vector perm_c is computed once.
|
|
kusano |
7d535a |
* The following data structures will be reused in the subsequent call to
|
|
kusano |
7d535a |
* DGSSVX: perm_c, etree
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
char equed[1];
|
|
kusano |
7d535a |
yes_no_t equil;
|
|
kusano |
7d535a |
trans_t trans;
|
|
kusano |
7d535a |
SuperMatrix A, A1, L, U;
|
|
kusano |
7d535a |
SuperMatrix B, B1, X;
|
|
kusano |
7d535a |
NCformat *Astore;
|
|
kusano |
7d535a |
NCformat *Ustore;
|
|
kusano |
7d535a |
SCformat *Lstore;
|
|
kusano |
7d535a |
double *a, *a1;
|
|
kusano |
7d535a |
int *asub, *xa, *asub1, *xa1;
|
|
kusano |
7d535a |
int *perm_r;
|
|
kusano |
7d535a |
int *perm_c;
|
|
kusano |
7d535a |
int *etree;
|
|
kusano |
7d535a |
void *work;
|
|
kusano |
7d535a |
int info, lwork, nrhs, ldx;
|
|
kusano |
7d535a |
int i, j, m, n, nnz;
|
|
kusano |
7d535a |
double *rhsb, *rhsb1, *rhsx, *xact;
|
|
kusano |
7d535a |
double *R, *C;
|
|
kusano |
7d535a |
double *ferr, *berr;
|
|
kusano |
7d535a |
double u, rpg, rcond;
|
|
kusano |
7d535a |
mem_usage_t mem_usage;
|
|
kusano |
7d535a |
superlu_options_t options;
|
|
kusano |
7d535a |
SuperLUStat_t stat;
|
|
kusano |
7d535a |
extern void parse_command_line();
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#if ( DEBUGlevel>=1 )
|
|
kusano |
7d535a |
CHECK_MALLOC("Enter main()");
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
lwork = 0;
|
|
kusano |
7d535a |
nrhs = 1;
|
|
kusano |
7d535a |
equil = YES;
|
|
kusano |
7d535a |
u = 1.0;
|
|
kusano |
7d535a |
trans = NOTRANS;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
options.Fact = DOFACT;
|
|
kusano |
7d535a |
options.Equil = YES;
|
|
kusano |
7d535a |
options.ColPerm = COLAMD;
|
|
kusano |
7d535a |
options.DiagPivotThresh = 1.0;
|
|
kusano |
7d535a |
options.Trans = NOTRANS;
|
|
kusano |
7d535a |
options.IterRefine = NOREFINE;
|
|
kusano |
7d535a |
options.SymmetricMode = NO;
|
|
kusano |
7d535a |
options.PivotGrowth = NO;
|
|
kusano |
7d535a |
options.ConditionNumber = NO;
|
|
kusano |
7d535a |
options.PrintStat = YES;
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
set_default_options(&options);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
parse_command_line(argc, argv, &lwork, &u, &equil, &trans);
|
|
kusano |
7d535a |
options.Equil = equil;
|
|
kusano |
7d535a |
options.DiagPivotThresh = u;
|
|
kusano |
7d535a |
options.Trans = trans;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( lwork > 0 ) {
|
|
kusano |
7d535a |
work = SUPERLU_MALLOC(lwork);
|
|
kusano |
7d535a |
if ( !work ) {
|
|
kusano |
7d535a |
ABORT("DLINSOLX: cannot allocate work[]");
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
dreadhb(&m, &n, &nnz, &a, &asub, &xa);
|
|
kusano |
7d535a |
if ( !(a1 = doubleMalloc(nnz)) ) ABORT("Malloc fails for a1[].");
|
|
kusano |
7d535a |
if ( !(asub1 = intMalloc(nnz)) ) ABORT("Malloc fails for asub1[].");
|
|
kusano |
7d535a |
if ( !(xa1 = intMalloc(n+1)) ) ABORT("Malloc fails for xa1[].");
|
|
kusano |
7d535a |
for (i = 0; i < nnz; ++i) {
|
|
kusano |
7d535a |
a1[i] = a[i];
|
|
kusano |
7d535a |
asub1[i] = asub[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
for (i = 0; i < n+1; ++i) xa1[i] = xa[i];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
|
|
kusano |
7d535a |
Astore = A.Store;
|
|
kusano |
7d535a |
printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( !(rhsb = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
|
|
kusano |
7d535a |
if ( !(rhsb1 = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb1[].");
|
|
kusano |
7d535a |
if ( !(rhsx = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
|
|
kusano |
7d535a |
dCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_D, SLU_GE);
|
|
kusano |
7d535a |
dCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_D, SLU_GE);
|
|
kusano |
7d535a |
xact = doubleMalloc(n * nrhs);
|
|
kusano |
7d535a |
ldx = n;
|
|
kusano |
7d535a |
dGenXtrue(n, nrhs, xact, ldx);
|
|
kusano |
7d535a |
dFillRHS(trans, nrhs, xact, ldx, &A, &B);
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; ++j)
|
|
kusano |
7d535a |
for (i = 0; i < m; ++i) rhsb1[i+j*m] = rhsb[i+j*m];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
|
|
kusano |
7d535a |
if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
|
|
kusano |
7d535a |
if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
|
|
kusano |
7d535a |
if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) )
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for R[].");
|
|
kusano |
7d535a |
if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for C[].");
|
|
kusano |
7d535a |
if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for ferr[].");
|
|
kusano |
7d535a |
if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
|
|
kusano |
7d535a |
ABORT("SUPERLU_MALLOC fails for berr[].");
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
StatInit(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME: AX = B
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
|
|
kusano |
7d535a |
&L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
|
|
kusano |
7d535a |
&mem_usage, &stat, &info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
printf("First system: dgssvx() returns info %d\n", info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( info == 0 || info == n+1 ) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
double *sol = (double*) ((DNformat*) X.Store)->nzval;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);
|
|
kusano |
7d535a |
if ( options.ConditionNumber )
|
|
kusano |
7d535a |
printf("Recip. condition number = %e\n", rcond);
|
|
kusano |
7d535a |
Lstore = (SCformat *) L.Store;
|
|
kusano |
7d535a |
Ustore = (NCformat *) U.Store;
|
|
kusano |
7d535a |
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
|
|
kusano |
7d535a |
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
|
|
kusano |
7d535a |
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
|
|
kusano |
7d535a |
printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
|
|
kusano |
7d535a |
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
|
|
kusano |
7d535a |
if ( options.IterRefine ) {
|
|
kusano |
7d535a |
printf("Iterative Refinement:\n");
|
|
kusano |
7d535a |
printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
|
|
kusano |
7d535a |
for (i = 0; i < nrhs; ++i)
|
|
kusano |
7d535a |
printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} else if ( info > 0 && lwork == -1 ) {
|
|
kusano |
7d535a |
printf("** Estimated memory: %d bytes\n", info - n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options.PrintStat ) StatPrint(&stat);
|
|
kusano |
7d535a |
StatFree(&stat);
|
|
kusano |
7d535a |
Destroy_CompCol_Matrix(&A);
|
|
kusano |
7d535a |
Destroy_Dense_Matrix(&B);
|
|
kusano |
7d535a |
if ( lwork >= 0 ) { /* Deallocate storage associated with L and U. */
|
|
kusano |
7d535a |
Destroy_SuperNode_Matrix(&L);
|
|
kusano |
7d535a |
Destroy_CompCol_Matrix(&U);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
NOW WE SOLVE ANOTHER LINEAR SYSTEM: A1*X = B1
|
|
kusano |
7d535a |
ONLY THE SPARSITY PATTERN OF A1 IS THE SAME AS THAT OF A.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
options.Fact = SamePattern;
|
|
kusano |
7d535a |
StatInit(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
dCreate_CompCol_Matrix(&A1, m, n, nnz, a1, asub1, xa1,
|
|
kusano |
7d535a |
SLU_NC, SLU_D, SLU_GE);
|
|
kusano |
7d535a |
dCreate_Dense_Matrix(&B1, m, nrhs, rhsb1, m, SLU_DN, SLU_D, SLU_GE);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
dgssvx(&options, &A1, perm_c, perm_r, etree, equed, R, C,
|
|
kusano |
7d535a |
&L, &U, work, lwork, &B1, &X, &rpg, &rcond, ferr, berr,
|
|
kusano |
7d535a |
&mem_usage, &stat, &info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
printf("\nSecond system: dgssvx() returns info %d\n", info);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( info == 0 || info == n+1 ) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
double *sol = (double*) ((DNformat*) X.Store)->nzval;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);
|
|
kusano |
7d535a |
if ( options.ConditionNumber )
|
|
kusano |
7d535a |
printf("Recip. condition number = %e\n", rcond);
|
|
kusano |
7d535a |
Lstore = (SCformat *) L.Store;
|
|
kusano |
7d535a |
Ustore = (NCformat *) U.Store;
|
|
kusano |
7d535a |
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
|
|
kusano |
7d535a |
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
|
|
kusano |
7d535a |
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
|
|
kusano |
7d535a |
printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
|
|
kusano |
7d535a |
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
|
|
kusano |
7d535a |
if ( options.IterRefine ) {
|
|
kusano |
7d535a |
printf("Iterative Refinement:\n");
|
|
kusano |
7d535a |
printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
|
|
kusano |
7d535a |
for (i = 0; i < nrhs; ++i)
|
|
kusano |
7d535a |
printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
} else if ( info > 0 && lwork == -1 ) {
|
|
kusano |
7d535a |
printf("** Estimated memory: %d bytes\n", info - n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( options.PrintStat ) StatPrint(&stat);
|
|
kusano |
7d535a |
StatFree(&stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
SUPERLU_FREE (xact);
|
|
kusano |
7d535a |
SUPERLU_FREE (etree);
|
|
kusano |
7d535a |
SUPERLU_FREE (perm_r);
|
|
kusano |
7d535a |
SUPERLU_FREE (perm_c);
|
|
kusano |
7d535a |
SUPERLU_FREE (R);
|
|
kusano |
7d535a |
SUPERLU_FREE (C);
|
|
kusano |
7d535a |
SUPERLU_FREE (ferr);
|
|
kusano |
7d535a |
SUPERLU_FREE (berr);
|
|
kusano |
7d535a |
Destroy_CompCol_Matrix(&A1);
|
|
kusano |
7d535a |
Destroy_Dense_Matrix(&B1);
|
|
kusano |
7d535a |
Destroy_Dense_Matrix(&X);
|
|
kusano |
7d535a |
if ( lwork == 0 ) {
|
|
kusano |
7d535a |
Destroy_SuperNode_Matrix(&L);
|
|
kusano |
7d535a |
Destroy_CompCol_Matrix(&U);
|
|
kusano |
7d535a |
} else if ( lwork > 0 ) {
|
|
kusano |
7d535a |
SUPERLU_FREE(work);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#if ( DEBUGlevel>=1 )
|
|
kusano |
7d535a |
CHECK_MALLOC("Exit main()");
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Parse command line options to get relaxed snode size, panel size, etc.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
parse_command_line(int argc, char *argv[], int *lwork,
|
|
kusano |
7d535a |
double *u, yes_no_t *equil, trans_t *trans )
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
int c;
|
|
kusano |
7d535a |
extern char *optarg;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
while ( (c = getopt(argc, argv, "hl:u:e:t:")) != EOF ) {
|
|
kusano |
7d535a |
switch (c) {
|
|
kusano |
7d535a |
case 'h':
|
|
kusano |
7d535a |
printf("Options:\n");
|
|
kusano |
7d535a |
printf("\t-l <int> - length of work[*] array\n");</int>
|
|
kusano |
7d535a |
printf("\t-u <int> - pivoting threshold\n");</int>
|
|
kusano |
7d535a |
printf("\t-e <0 or 1> - equilibrate or not\n");
|
|
kusano |
7d535a |
printf("\t-t <0 or 1> - solve transposed system or not\n");
|
|
kusano |
7d535a |
exit(1);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case 'l': *lwork = atoi(optarg);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case 'u': *u = atof(optarg);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case 'e': *equil = atoi(optarg);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case 't': *trans = atoi(optarg);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|