Blob Blame Raw
/*
 * -- SuperLU routine (version 4.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * June 30, 2009
 *
 */
#include <stdio.h>
#include "mex.h"
#include "slu_ddefs.h"


#ifdef V5
#define  MatlabMatrix mxArray
#else    /* V4 */
#define  MatlabMatrix Matrix
#endif



/* Aliases for input and output arguments */
#define A_in		prhs[0]
#define Pc_in		prhs[1]
#define L_out    	plhs[0]
#define U_out          	plhs[1]
#define Pr_out     	plhs[2]
#define Pc_out   	plhs[3]

void LUextract(SuperMatrix *, SuperMatrix *, double *, int *, int *, 
	       double *, int *, int *, int *, int*);

#define verbose (SPUMONI>0)
#define babble  (SPUMONI>1)
#define burble  (SPUMONI>2)

void mexFunction(
    int          nlhs,           /* number of expected outputs */
    MatlabMatrix *plhs[],        /* matrix pointer array returning outputs */
    int          nrhs,           /* number of inputs */
#ifdef V5
    const MatlabMatrix *prhs[]   /* matrix pointer array for inputs */
#else /* V4 */
    MatlabMatrix *prhs[]         /* matrix pointer array for inputs */
#endif
    )
{
    int SPUMONI;             /* ... as should the sparse monitor flag */
#ifdef V5
    double FlopsInSuperLU;   /* ... as should the flop counter */
#else
    Real FlopsInSuperLU;     /* ... as should the flop counter */
#endif
    extern flops_t LUFactFlops(SuperLUStat_t *);
    
    /* Arguments to C dgstrf(). */
    SuperMatrix A;
    SuperMatrix Ac;        /* Matrix postmultiplied by Pc */
    SuperMatrix L, U;
    int	   	m, n, nnz;
    double      *val;
    int       	*rowind;
    int		*colptr;
    int    	*etree, *perm_r, *perm_c;
    int         panel_size, relax;
    double      thresh = 1.0;       /* diagonal pivoting threshold */
    int		info;
    MatlabMatrix *X, *Y;            /* args to calls back to Matlab */
    int         i, mexerr;
    double      *dp;
    double      *Lval, *Uval;
    int         *Lrow, *Urow;
    int         *Lcol, *Ucol;
    int         nnzL, nnzU, snnzL, snnzU;
    superlu_options_t options;
    SuperLUStat_t stat;

    /* Check number of arguments passed from Matlab. */
    if (nrhs != 2) {
	mexErrMsgTxt("SUPERLU requires 2 input arguments.");
    } else if (nlhs != 4) {
      	mexErrMsgTxt("SUPERLU requires 4 output arguments.");
    }   

    /* Read the Sparse Monitor Flag */
    X = mxCreateString("spumoni");
    mexerr = mexCallMATLAB(1, &Y, 1, &X, "sparsfun");
    SPUMONI = mxGetScalar(Y);
#ifdef V5
    mxDestroyArray(Y);
    mxDestroyArray(X);
#else
    mxFreeMatrix(Y);
    mxFreeMatrix(X);
#endif

    m = mxGetM(A_in);
    n = mxGetN(A_in);
    etree = (int *) mxCalloc(n, sizeof(int));
    perm_r = (int *) mxCalloc(m, sizeof(int));
    perm_c = mxGetIr(Pc_in); 
    val = mxGetPr(A_in);
    rowind = mxGetIr(A_in);
    colptr = mxGetJc(A_in);
    nnz = colptr[n];
    dCreate_CompCol_Matrix(&A, m, n, nnz, val, rowind, colptr,
			   SLU_NC, SLU_D, SLU_GE);
    panel_size = sp_ienv(1);
    relax      = sp_ienv(2);
    thresh     = 1.0;
    FlopsInSuperLU      = 0;

    set_default_options(&options);
    StatInit(&stat);

    if ( verbose ) mexPrintf("Apply column perm to A and compute etree...\n");
    sp_preorder(&options, &A, perm_c, etree, &Ac);

    if ( verbose ) {
	mexPrintf("LU factorization...\n");
	mexPrintf("\tpanel_size %d, relax %d, diag_pivot_thresh %.2g\n",
		  panel_size, relax, thresh);
    }

    dgstrf(&options, &Ac, relax, panel_size, etree,
	   NULL, 0, perm_c, perm_r, &L, &U, &stat, &info);

    if ( verbose ) mexPrintf("INFO from dgstrf %d\n", info);

#if 0 /* FLOPS is not available in the new Matlab. */
    /* Tell Matlab how many flops we did. */
    FlopsInSuperLU += LUFactFlops(&stat);
    if (verbose) mexPrintf("SUPERLU flops: %.f\n", FlopsInSuperLU);
    mexerr = mexCallMATLAB(1, &X, 0, NULL, "flops");
    *(mxGetPr(X)) += FlopsInSuperLU;
    mexerr = mexCallMATLAB(1, &Y, 1, &X, "flops");
#ifdef V5
    mxDestroyArray(Y);
    mxDestroyArray(X);
#else
    mxFreeMatrix(Y);
    mxFreeMatrix(X);
#endif
#endif
	
    /* Construct output arguments for Matlab. */
    if ( info >= 0 && info <= n ) {
#ifdef V5
	Pr_out = mxCreateDoubleMatrix(m, 1, mxREAL);
#else
	Pr_out = mxCreateFull(m, 1, REAL);
#endif
	dp = mxGetPr(Pr_out);
	for (i = 0; i < m; *dp++ = (double) perm_r[i++]+1);
#ifdef V5
	Pc_out = mxCreateDoubleMatrix(n, 1, mxREAL);
#else
	Pc_out = mxCreateFull(n, 1, REAL);
#endif
	dp = mxGetPr(Pc_out);
	for (i = 0; i < n; *dp++ = (double) perm_c[i++]+1);
	
	/* Now for L and U */
	nnzL = ((SCformat*)L.Store)->nnz; /* count diagonals */
   	nnzU = ((NCformat*)U.Store)->nnz;

#ifdef V5
	L_out = mxCreateSparse(m, n, nnzL, mxREAL);
#else
	L_out = mxCreateSparse(m, n, nnzL, REAL);
#endif
	Lval = mxGetPr(L_out);
	Lrow = mxGetIr(L_out);
	Lcol = mxGetJc(L_out);

#ifdef V5
	U_out = mxCreateSparse(m, n, nnzU, mxREAL);
#else
	U_out = mxCreateSparse(m, n, nnzU, REAL);
#endif
	Uval = mxGetPr(U_out);
	Urow = mxGetIr(U_out);
	Ucol = mxGetJc(U_out);

	LUextract(&L, &U, Lval, Lrow, Lcol, Uval, Urow, Ucol, &snnzL, &snnzU);
	
        Destroy_CompCol_Permuted(&Ac);
	Destroy_SuperNode_Matrix(&L);
	Destroy_CompCol_Matrix(&U);

	if (babble) mexPrintf("factor nonzeros: %d unsqueezed, %d squeezed.\n",
			      nnzL + nnzU, snnzL + snnzU);
    } else {
	mexErrMsgTxt("Error returned from C dgstrf().");
    }

    mxFree(etree);
    mxFree(perm_r);
    StatFree(&stat);
    return;
}

void
LUextract(SuperMatrix *L, SuperMatrix *U, double *Lval, int *Lrow,
	  int *Lcol, double *Uval, int *Urow, int *Ucol, int *snnzL,
	  int *snnzU)
{
    int         i, j, k;
    int         upper;
    int         fsupc, istart, nsupr;
    int         lastl = 0, lastu = 0;
    SCformat    *Lstore;
    NCformat    *Ustore;
    double      *SNptr;

    Lstore = L->Store;
    Ustore = U->Store;
    Lcol[0] = 0;
    Ucol[0] = 0;
    
    /* for each supernode */
    for (k = 0; k <= Lstore->nsuper; ++k) {
	
	fsupc = L_FST_SUPC(k);
	istart = L_SUB_START(fsupc);
	nsupr = L_SUB_START(fsupc+1) - istart;
	upper = 1;
	
	/* for each column in the supernode */
	for (j = fsupc; j < L_FST_SUPC(k+1); ++j) {
	    SNptr = &((double*)Lstore->nzval)[L_NZ_START(j)];

	    /* Extract U */
	    for (i = U_NZ_START(j); i < U_NZ_START(j+1); ++i) {
		Uval[lastu] = ((double*)Ustore->nzval)[i];
 		/* Matlab doesn't like explicit zero. */
		if (Uval[lastu] != 0.0) Urow[lastu++] = U_SUB(i);
	    }
	    for (i = 0; i < upper; ++i) { /* upper triangle in the supernode */
		Uval[lastu] = SNptr[i];
 		/* Matlab doesn't like explicit zero. */
		if (Uval[lastu] != 0.0) Urow[lastu++] = L_SUB(istart+i);
	    }
	    Ucol[j+1] = lastu;

	    /* Extract L */
	    Lval[lastl] = 1.0; /* unit diagonal */
	    Lrow[lastl++] = L_SUB(istart + upper - 1);
	    for (i = upper; i < nsupr; ++i) {
		Lval[lastl] = SNptr[i];
 		/* Matlab doesn't like explicit zero. */
		if (Lval[lastl] != 0.0) Lrow[lastl++] = L_SUB(istart+i);
	    }
	    Lcol[j+1] = lastl;

	    ++upper;
	    
	} /* for j ... */
	
    } /* for k ... */

    *snnzL = lastl;
    *snnzU = lastu;
}