kusano 7d535a
kusano 7d535a
#include "slu_sdefs.h"
kusano 7d535a
/*
kusano 7d535a
 * -- SuperLU routine (version 4.1) --
kusano 7d535a
 * Lawrence Berkeley National Laboratory
kusano 7d535a
 * November, 2010
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
int *GLOBAL_PERM_C, *GLOBAL_PERM_R;
kusano 7d535a
SuperMatrix *GLOBAL_A, *GLOBAL_L, *GLOBAL_U;
kusano 7d535a
SuperLUStat_t *GLOBAL_STAT;
kusano 7d535a
kusano 7d535a
void smatvec_mult(float alpha, float x[], float beta, float y[])
kusano 7d535a
{
kusano 7d535a
    SuperMatrix *A = GLOBAL_A;
kusano 7d535a
kusano 7d535a
    sp_sgemv("N", alpha, A, x, 1, beta, y, 1);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
void spsolve(int n, float x[], float y[])
kusano 7d535a
{
kusano 7d535a
    extern void scopy_(int *, float [], int *, float [], int *);
kusano 7d535a
kusano 7d535a
    int i_1 = 1;
kusano 7d535a
    SuperMatrix *L = GLOBAL_L, *U = GLOBAL_U;
kusano 7d535a
    SuperLUStat_t *stat = GLOBAL_STAT;
kusano 7d535a
    int *perm_c = GLOBAL_PERM_C, *perm_r = GLOBAL_PERM_R;
kusano 7d535a
    int info;
kusano 7d535a
    static DNformat X;
kusano 7d535a
    static SuperMatrix XX = {SLU_DN, SLU_S, SLU_GE, 1, 1, &X};
kusano 7d535a
kusano 7d535a
    scopy_(&n, y, &i_1, x, &i_1);
kusano 7d535a
    XX.nrow = n;
kusano 7d535a
    X.lda = n;
kusano 7d535a
    X.nzval = x;
kusano 7d535a
    sgstrs(NOTRANS, L, U, perm_c, perm_r, &XX, stat, &info);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
int main(int argc, char *argv[])
kusano 7d535a
{
kusano 7d535a
    void smatvec_mult(float alpha, float x[], float beta, float y[]);
kusano 7d535a
    void spsolve(int n, float x[], float y[]);
kusano 7d535a
    extern int sfgmr( int n,
kusano 7d535a
	void (*matvec_mult)(float, float [], float, float []),
kusano 7d535a
	void (*psolve)(int n, float [], float[]),
kusano 7d535a
	float *rhs, float *sol, double tol, int restrt, int *itmax,
kusano 7d535a
	FILE *fits);
kusano 7d535a
    extern int sfill_diag(int n, NCformat *Astore);
kusano 7d535a
kusano 7d535a
    char     equed[1] = {'B'};
kusano 7d535a
    yes_no_t equil;
kusano 7d535a
    trans_t  trans;
kusano 7d535a
    SuperMatrix A, L, U;
kusano 7d535a
    SuperMatrix B, X;
kusano 7d535a
    NCformat *Astore;
kusano 7d535a
    NCformat *Ustore;
kusano 7d535a
    SCformat *Lstore;
kusano 7d535a
    float   *a;
kusano 7d535a
    int      *asub, *xa;
kusano 7d535a
    int      *etree;
kusano 7d535a
    int      *perm_c; /* column permutation vector */
kusano 7d535a
    int      *perm_r; /* row permutations from partial pivoting */
kusano 7d535a
    int      nrhs, ldx, lwork, info, m, n, nnz;
kusano 7d535a
    float   *rhsb, *rhsx, *xact;
kusano 7d535a
    float   *work = NULL;
kusano 7d535a
    float   *R, *C;
kusano 7d535a
    float   u, rpg, rcond;
kusano 7d535a
    float zero = 0.0;
kusano 7d535a
    float one = 1.0;
kusano 7d535a
    mem_usage_t   mem_usage;
kusano 7d535a
    superlu_options_t options;
kusano 7d535a
    SuperLUStat_t stat;
kusano 7d535a
kusano 7d535a
    int restrt, iter, maxit, i;
kusano 7d535a
    double resid;
kusano 7d535a
    float *x, *b;
kusano 7d535a
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
    extern int num_drop_L, num_drop_U;
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
#if ( DEBUGlevel>=1 )
kusano 7d535a
    CHECK_MALLOC("Enter main()");
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    /* Defaults */
kusano 7d535a
    lwork = 0;
kusano 7d535a
    nrhs  = 1;
kusano 7d535a
    equil = YES;
kusano 7d535a
    u	  = 0.1; /* u=1.0 for complete factorization */
kusano 7d535a
    trans = NOTRANS;
kusano 7d535a
kusano 7d535a
    /* Set the default input options:
kusano 7d535a
	options.Fact = DOFACT;
kusano 7d535a
	options.Equil = YES;
kusano 7d535a
	options.ColPerm = COLAMD;
kusano 7d535a
	options.DiagPivotThresh = 0.1; //different from complete LU
kusano 7d535a
	options.Trans = NOTRANS;
kusano 7d535a
	options.IterRefine = NOREFINE;
kusano 7d535a
	options.SymmetricMode = NO;
kusano 7d535a
	options.PivotGrowth = NO;
kusano 7d535a
	options.ConditionNumber = NO;
kusano 7d535a
	options.PrintStat = YES;
kusano 7d535a
	options.RowPerm = LargeDiag;
kusano 7d535a
	options.ILU_DropTol = 1e-4;
kusano 7d535a
	options.ILU_FillTol = 1e-2;
kusano 7d535a
	options.ILU_FillFactor = 10.0;
kusano 7d535a
	options.ILU_DropRule = DROP_BASIC | DROP_AREA;
kusano 7d535a
	options.ILU_Norm = INF_NORM;
kusano 7d535a
	options.ILU_MILU = SILU;
kusano 7d535a
     */
kusano 7d535a
    ilu_set_default_options(&options);
kusano 7d535a
kusano 7d535a
    /* Modify the defaults. */
kusano 7d535a
    options.PivotGrowth = YES;	  /* Compute reciprocal pivot growth */
kusano 7d535a
    options.ConditionNumber = YES;/* Compute reciprocal condition number */
kusano 7d535a
kusano 7d535a
    if ( lwork > 0 ) {
kusano 7d535a
	work = SUPERLU_MALLOC(lwork);
kusano 7d535a
	if ( !work ) ABORT("Malloc fails for work[].");
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* Read matrix A from a file in Harwell-Boeing format.*/
kusano 7d535a
    if (argc < 2)
kusano 7d535a
    {
kusano 7d535a
	printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n"
kusano 7d535a
		"-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n"
kusano 7d535a
		"-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n"
kusano 7d535a
		"-t -triplet:\n\t[INPUT] is a triplet format matrix.\n",
kusano 7d535a
		argv[0]);
kusano 7d535a
	return 0;
kusano 7d535a
    }
kusano 7d535a
    else
kusano 7d535a
    {
kusano 7d535a
	switch (argv[1][1])
kusano 7d535a
	{
kusano 7d535a
	    case 'H':
kusano 7d535a
	    case 'h':
kusano 7d535a
		printf("Input a Harwell-Boeing format matrix:\n");
kusano 7d535a
		sreadhb(&m, &n, &nnz, &a, &asub, &xa);
kusano 7d535a
		break;
kusano 7d535a
	    case 'R':
kusano 7d535a
	    case 'r':
kusano 7d535a
		printf("Input a Rutherford-Boeing format matrix:\n");
kusano 7d535a
		sreadrb(&m, &n, &nnz, &a, &asub, &xa);
kusano 7d535a
		break;
kusano 7d535a
	    case 'T':
kusano 7d535a
	    case 't':
kusano 7d535a
		printf("Input a triplet format matrix:\n");
kusano 7d535a
		sreadtriple(&m, &n, &nnz, &a, &asub, &xa);
kusano 7d535a
		break;
kusano 7d535a
	    default:
kusano 7d535a
		printf("Unrecognized format.\n");
kusano 7d535a
		return 0;
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE);
kusano 7d535a
    Astore = A.Store;
kusano 7d535a
    sfill_diag(n, Astore);
kusano 7d535a
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
kusano 7d535a
    fflush(stdout);
kusano 7d535a
kusano 7d535a
    if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
kusano 7d535a
    if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
kusano 7d535a
    sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE);
kusano 7d535a
    sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE);
kusano 7d535a
    xact = floatMalloc(n * nrhs);
kusano 7d535a
    ldx = n;
kusano 7d535a
    sGenXtrue(n, nrhs, xact, ldx);
kusano 7d535a
    sFillRHS(trans, nrhs, xact, ldx, &A, &B);
kusano 7d535a
kusano 7d535a
    if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
kusano 7d535a
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
kusano 7d535a
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
kusano 7d535a
    if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) )
kusano 7d535a
	ABORT("SUPERLU_MALLOC fails for R[].");
kusano 7d535a
    if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) )
kusano 7d535a
	ABORT("SUPERLU_MALLOC fails for C[].");
kusano 7d535a
kusano 7d535a
    info = 0;
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
    num_drop_L = 0;
kusano 7d535a
    num_drop_U = 0;
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    /* Initialize the statistics variables. */
kusano 7d535a
    StatInit(&stat);
kusano 7d535a
kusano 7d535a
    /* Compute the incomplete factorization and compute the condition number
kusano 7d535a
       and pivot growth using dgsisx. */
kusano 7d535a
    sgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work,
kusano 7d535a
	   lwork, &B, &X, &rpg, &rcond, &mem_usage, &stat, &info);
kusano 7d535a
kusano 7d535a
    Lstore = (SCformat *) L.Store;
kusano 7d535a
    Ustore = (NCformat *) U.Store;
kusano 7d535a
    printf("sgsisx(): info %d\n", info);
kusano 7d535a
    if (info > 0 || rcond < 1e-8 || rpg > 1e8)
kusano 7d535a
	printf("WARNING: This preconditioner might be unstable.\n");
kusano 7d535a
kusano 7d535a
    if ( info == 0 || info == n+1 ) {
kusano 7d535a
kusano 7d535a
	if ( options.PivotGrowth == YES )
kusano 7d535a
	    printf("Recip. pivot growth = %e\n", rpg);
kusano 7d535a
	if ( options.ConditionNumber == YES )
kusano 7d535a
	    printf("Recip. condition number = %e\n", rcond);
kusano 7d535a
kusano 7d535a
    } else if ( info > 0 && lwork == -1 ) {
kusano 7d535a
	printf("** Estimated memory: %d bytes\n", info - n);
kusano 7d535a
    }
kusano 7d535a
    printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz);
kusano 7d535a
    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
kusano 7d535a
    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
kusano 7d535a
    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
kusano 7d535a
    printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n",
kusano 7d535a
	    ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n)
kusano 7d535a
	    / (double)Astore->nnz);
kusano 7d535a
    printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
kusano 7d535a
	   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
kusano 7d535a
    fflush(stdout);
kusano 7d535a
kusano 7d535a
    /* Set the global variables. */
kusano 7d535a
    GLOBAL_A = &A;
kusano 7d535a
    GLOBAL_L = &L;
kusano 7d535a
    GLOBAL_U = &U;
kusano 7d535a
    GLOBAL_STAT = &stat;
kusano 7d535a
    GLOBAL_PERM_C = perm_c;
kusano 7d535a
    GLOBAL_PERM_R = perm_r;
kusano 7d535a
kusano 7d535a
    /* Set the variables used by GMRES. */
kusano 7d535a
    restrt = SUPERLU_MIN(n / 3 + 1, 50);
kusano 7d535a
    maxit = 1000;
kusano 7d535a
    iter = maxit;
kusano 7d535a
    resid = 1e-8;
kusano 7d535a
    if (!(b = floatMalloc(m))) ABORT("Malloc fails for b[].");
kusano 7d535a
    if (!(x = floatMalloc(n))) ABORT("Malloc fails for x[].");
kusano 7d535a
kusano 7d535a
    if (info <= n + 1)
kusano 7d535a
    {
kusano 7d535a
	int i_1 = 1;
kusano 7d535a
	double maxferr = 0.0, nrmA, nrmB, res, t;
kusano 7d535a
        float temp;
kusano 7d535a
	extern float snrm2_(int *, float [], int *);
kusano 7d535a
	extern void saxpy_(int *, float *, float [], int *, float [], int *);
kusano 7d535a
kusano 7d535a
	/* Call GMRES. */
kusano 7d535a
	for (i = 0; i < n; i++) b[i] = rhsb[i];
kusano 7d535a
	for (i = 0; i < n; i++) x[i] = zero;
kusano 7d535a
kusano 7d535a
	t = SuperLU_timer_();
kusano 7d535a
kusano 7d535a
	sfgmr(n, smatvec_mult, spsolve, b, x, resid, restrt, &iter, stdout);
kusano 7d535a
kusano 7d535a
	t = SuperLU_timer_() - t;
kusano 7d535a
kusano 7d535a
	/* Output the result. */
kusano 7d535a
	nrmA = snrm2_(&(Astore->nnz), (float *)((DNformat *)A.Store)->nzval,
kusano 7d535a
		&i_1);
kusano 7d535a
	nrmB = snrm2_(&m, b, &i_1);
kusano 7d535a
	sp_sgemv("N", -1.0, &A, x, 1, 1.0, b, 1);
kusano 7d535a
	res = snrm2_(&m, b, &i_1);
kusano 7d535a
	resid = res / nrmB;
kusano 7d535a
	printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, "
kusano 7d535a
		"relres = %.1e\n", nrmA, nrmB, res, resid);
kusano 7d535a
kusano 7d535a
	if (iter >= maxit)
kusano 7d535a
	{
kusano 7d535a
	    if (resid >= 1.0) iter = -180;
kusano 7d535a
	    else if (resid > 1e-8) iter = -111;
kusano 7d535a
	}
kusano 7d535a
	printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n",
kusano 7d535a
		iter, resid, t);
kusano 7d535a
kusano 7d535a
	/* Scale the solution back if equilibration was performed. */
kusano 7d535a
	if (*equed == 'C' || *equed == 'B') 
kusano 7d535a
	    for (i = 0; i < n; i++) x[i] *= C[i];
kusano 7d535a
kusano 7d535a
	for (i = 0; i < m; i++) {
kusano 7d535a
	    maxferr = SUPERLU_MAX(maxferr, fabs(x[i] - xact[i]));
kusano 7d535a
        }
kusano 7d535a
	printf("||X-X_true||_oo = %.1e\n", maxferr);
kusano 7d535a
    }
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
    printf("%d entries in L and %d entries in U dropped.\n",
kusano 7d535a
	    num_drop_L, num_drop_U);
kusano 7d535a
#endif
kusano 7d535a
    fflush(stdout);
kusano 7d535a
kusano 7d535a
    if ( options.PrintStat ) StatPrint(&stat);
kusano 7d535a
    StatFree(&stat);
kusano 7d535a
kusano 7d535a
    SUPERLU_FREE (rhsb);
kusano 7d535a
    SUPERLU_FREE (rhsx);
kusano 7d535a
    SUPERLU_FREE (xact);
kusano 7d535a
    SUPERLU_FREE (etree);
kusano 7d535a
    SUPERLU_FREE (perm_r);
kusano 7d535a
    SUPERLU_FREE (perm_c);
kusano 7d535a
    SUPERLU_FREE (R);
kusano 7d535a
    SUPERLU_FREE (C);
kusano 7d535a
    Destroy_CompCol_Matrix(&A);
kusano 7d535a
    Destroy_SuperMatrix_Store(&B);
kusano 7d535a
    Destroy_SuperMatrix_Store(&X);
kusano 7d535a
    if ( lwork >= 0 ) {
kusano 7d535a
	Destroy_SuperNode_Matrix(&L);
kusano 7d535a
	Destroy_CompCol_Matrix(&U);
kusano 7d535a
    }
kusano 7d535a
    SUPERLU_FREE(b);
kusano 7d535a
    SUPERLU_FREE(x);
kusano 7d535a
kusano 7d535a
#if ( DEBUGlevel>=1 )
kusano 7d535a
    CHECK_MALLOC("Exit main()");
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    return 0;
kusano 7d535a
}