kusano 7d535a
kusano 7d535a
/*! @file cgsitf.c
kusano 7d535a
 * \brief Computes an ILU factorization of a general sparse matrix
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 4.0) --
kusano 7d535a
 * Lawrence Berkeley National Laboratory.
kusano 7d535a
 * June 30, 2009
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
#include "slu_cdefs.h"
kusano 7d535a
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
int num_drop_L;
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
/*! \brief
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * Purpose
kusano 7d535a
 * =======
kusano 7d535a
 *
kusano 7d535a
 * CGSITRF computes an ILU factorization of a general sparse m-by-n
kusano 7d535a
 * matrix A using partial pivoting with row interchanges.
kusano 7d535a
 * The factorization has the form
kusano 7d535a
 *     Pr * A = L * U
kusano 7d535a
 * where Pr is a row permutation matrix, L is lower triangular with unit
kusano 7d535a
 * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
kusano 7d535a
 * triangular (upper trapezoidal if A->nrow < A->ncol).
kusano 7d535a
 *
kusano 7d535a
 * See supermatrix.h for the definition of 'SuperMatrix' structure.
kusano 7d535a
 *
kusano 7d535a
 * Arguments
kusano 7d535a
 * =========
kusano 7d535a
 *
kusano 7d535a
 * options (input) superlu_options_t*
kusano 7d535a
 *	   The structure defines the input parameters to control
kusano 7d535a
 *	   how the ILU decomposition will be performed.
kusano 7d535a
 *
kusano 7d535a
 * A	    (input) SuperMatrix*
kusano 7d535a
 *	    Original matrix A, permuted by columns, of dimension
kusano 7d535a
 *	    (A->nrow, A->ncol). The type of A can be:
kusano 7d535a
 *	    Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
kusano 7d535a
 *
kusano 7d535a
 * relax    (input) int
kusano 7d535a
 *	    To control degree of relaxing supernodes. If the number
kusano 7d535a
 *	    of nodes (columns) in a subtree of the elimination tree is less
kusano 7d535a
 *	    than relax, this subtree is considered as one supernode,
kusano 7d535a
 *	    regardless of the row structures of those columns.
kusano 7d535a
 *
kusano 7d535a
 * panel_size (input) int
kusano 7d535a
 *	    A panel consists of at most panel_size consecutive columns.
kusano 7d535a
 *
kusano 7d535a
 * etree    (input) int*, dimension (A->ncol)
kusano 7d535a
 *	    Elimination tree of A'*A.
kusano 7d535a
 *	    Note: etree is a vector of parent pointers for a forest whose
kusano 7d535a
 *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
kusano 7d535a
 *	    On input, the columns of A should be permuted so that the
kusano 7d535a
 *	    etree is in a certain postorder.
kusano 7d535a
 *
kusano 7d535a
 * work     (input/output) void*, size (lwork) (in bytes)
kusano 7d535a
 *	    User-supplied work space and space for the output data structures.
kusano 7d535a
 *	    Not referenced if lwork = 0;
kusano 7d535a
 *
kusano 7d535a
 * lwork   (input) int
kusano 7d535a
 *	   Specifies the size of work array in bytes.
kusano 7d535a
 *	   = 0:  allocate space internally by system malloc;
kusano 7d535a
 *	   > 0:  use user-supplied work array of length lwork in bytes,
kusano 7d535a
 *		 returns error if space runs out.
kusano 7d535a
 *	   = -1: the routine guesses the amount of space needed without
kusano 7d535a
 *		 performing the factorization, and returns it in
kusano 7d535a
 *		 *info; no other side effects.
kusano 7d535a
 *
kusano 7d535a
 * perm_c   (input) int*, dimension (A->ncol)
kusano 7d535a
 *	    Column permutation vector, which defines the
kusano 7d535a
 *	    permutation matrix Pc; perm_c[i] = j means column i of A is
kusano 7d535a
 *	    in position j in A*Pc.
kusano 7d535a
 *	    When searching for diagonal, perm_c[*] is applied to the
kusano 7d535a
 *	    row subscripts of A, so that diagonal threshold pivoting
kusano 7d535a
 *	    can find the diagonal of A, rather than that of A*Pc.
kusano 7d535a
 *
kusano 7d535a
 * perm_r   (input/output) int*, dimension (A->nrow)
kusano 7d535a
 *	    Row permutation vector which defines the permutation matrix Pr,
kusano 7d535a
 *	    perm_r[i] = j means row i of A is in position j in Pr*A.
kusano 7d535a
 *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
kusano 7d535a
 *	       will try to use the input perm_r, unless a certain threshold
kusano 7d535a
 *	       criterion is violated. In that case, perm_r is overwritten by
kusano 7d535a
 *	       a new permutation determined by partial pivoting or diagonal
kusano 7d535a
 *	       threshold pivoting.
kusano 7d535a
 *	    Otherwise, perm_r is output argument;
kusano 7d535a
 *
kusano 7d535a
 * L	    (output) SuperMatrix*
kusano 7d535a
 *	    The factor L from the factorization Pr*A=L*U; use compressed row
kusano 7d535a
 *	    subscripts storage for supernodes, i.e., L has type:
kusano 7d535a
 *	    Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
kusano 7d535a
 *
kusano 7d535a
 * U	    (output) SuperMatrix*
kusano 7d535a
 *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
kusano 7d535a
 *	    storage scheme, i.e., U has types: Stype = SLU_NC,
kusano 7d535a
 *	    Dtype = SLU_C, Mtype = SLU_TRU.
kusano 7d535a
 *
kusano 7d535a
 * stat     (output) SuperLUStat_t*
kusano 7d535a
 *	    Record the statistics on runtime and floating-point operation count.
kusano 7d535a
 *	    See slu_util.h for the definition of 'SuperLUStat_t'.
kusano 7d535a
 *
kusano 7d535a
 * info     (output) int*
kusano 7d535a
 *	    = 0: successful exit
kusano 7d535a
 *	    < 0: if info = -i, the i-th argument had an illegal value
kusano 7d535a
 *	    > 0: if info = i, and i is
kusano 7d535a
 *	       <= A->ncol: number of zero pivots. They are replaced by small
kusano 7d535a
 *		  entries according to options->ILU_FillTol.
kusano 7d535a
 *	       > A->ncol: number of bytes allocated when memory allocation
kusano 7d535a
 *		  failure occurred, plus A->ncol. If lwork = -1, it is
kusano 7d535a
 *		  the estimated amount of space needed, plus A->ncol.
kusano 7d535a
 *
kusano 7d535a
 * ======================================================================
kusano 7d535a
 *
kusano 7d535a
 * Local Working Arrays:
kusano 7d535a
 * ======================
kusano 7d535a
 *   m = number of rows in the matrix
kusano 7d535a
 *   n = number of columns in the matrix
kusano 7d535a
 *
kusano 7d535a
 *   marker[0:3*m-1]: marker[i] = j means that node i has been
kusano 7d535a
 *	reached when working on column j.
kusano 7d535a
 *	Storage: relative to original row subscripts
kusano 7d535a
 *	NOTE: There are 4 of them:
kusano 7d535a
 *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
kusano 7d535a
 *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
kusano 7d535a
 *	      marker_relax(has its own space) is used for relaxed supernodes.
kusano 7d535a
 *
kusano 7d535a
 *   parent[0:m-1]: parent vector used during dfs
kusano 7d535a
 *	Storage: relative to new row subscripts
kusano 7d535a
 *
kusano 7d535a
 *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
kusano 7d535a
 *	unexplored neighbor of i in lsub[*]
kusano 7d535a
 *
kusano 7d535a
 *   segrep[0:nseg-1]: contains the list of supernodal representatives
kusano 7d535a
 *	in topological order of the dfs. A supernode representative is the
kusano 7d535a
 *	last column of a supernode.
kusano 7d535a
 *	The maximum size of segrep[] is n.
kusano 7d535a
 *
kusano 7d535a
 *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
kusano 7d535a
 *	supernodal representative r, repfnz[r] is the location of the first
kusano 7d535a
 *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
kusano 7d535a
 *	indicates the supernode r has been explored.
kusano 7d535a
 *	NOTE: There are W of them, each used for one column of a panel.
kusano 7d535a
 *
kusano 7d535a
 *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
kusano 7d535a
 *	the panel diagonal. These are filled in during dpanel_dfs(), and are
kusano 7d535a
 *	used later in the inner LU factorization within the panel.
kusano 7d535a
 *	panel_lsub[]/dense[] pair forms the SPA data structure.
kusano 7d535a
 *	NOTE: There are W of them.
kusano 7d535a
 *
kusano 7d535a
 *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
kusano 7d535a
 *		   NOTE: there are W of them.
kusano 7d535a
 *
kusano 7d535a
 *   tempv[0:*]: real temporary used for dense numeric kernels;
kusano 7d535a
 *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
kusano 7d535a
 *	It is also used by the dropping routine ilu_ddrop_row().
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
void
kusano 7d535a
cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
kusano 7d535a
	int *etree, void *work, int lwork, int *perm_c, int *perm_r,
kusano 7d535a
	SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info)
kusano 7d535a
{
kusano 7d535a
    /* Local working arrays */
kusano 7d535a
    NCPformat *Astore;
kusano 7d535a
    int       *iperm_r = NULL; /* inverse of perm_r; used when
kusano 7d535a
				  options->Fact == SamePattern_SameRowPerm */
kusano 7d535a
    int       *iperm_c; /* inverse of perm_c */
kusano 7d535a
    int       *swap, *iswap; /* swap is used to store the row permutation
kusano 7d535a
				during the factorization. Initially, it is set
kusano 7d535a
				to iperm_c (row indeces of Pc*A*Pc').
kusano 7d535a
				iswap is the inverse of swap. After the
kusano 7d535a
				factorization, it is equal to perm_r. */
kusano 7d535a
    int       *iwork;
kusano 7d535a
    complex   *cwork;
kusano 7d535a
    int       *segrep, *repfnz, *parent, *xplore;
kusano 7d535a
    int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
kusano 7d535a
    int       *marker, *marker_relax;
kusano 7d535a
    complex    *dense, *tempv;
kusano 7d535a
    float *stempv;
kusano 7d535a
    int       *relax_end, *relax_fsupc;
kusano 7d535a
    complex    *a;
kusano 7d535a
    int       *asub;
kusano 7d535a
    int       *xa_begin, *xa_end;
kusano 7d535a
    int       *xsup, *supno;
kusano 7d535a
    int       *xlsub, *xlusup, *xusub;
kusano 7d535a
    int       nzlumax;
kusano 7d535a
    float    *amax; 
kusano 7d535a
    complex    drop_sum;
kusano 7d535a
    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
kusano 7d535a
    int       *iwork2;	   /* used by the second dropping rule */
kusano 7d535a
kusano 7d535a
    /* Local scalars */
kusano 7d535a
    fact_t    fact = options->Fact;
kusano 7d535a
    double    diag_pivot_thresh = options->DiagPivotThresh;
kusano 7d535a
    double    drop_tol = options->ILU_DropTol; /* tau */
kusano 7d535a
    double    fill_ini = options->ILU_FillTol; /* tau^hat */
kusano 7d535a
    double    gamma = options->ILU_FillFactor;
kusano 7d535a
    int       drop_rule = options->ILU_DropRule;
kusano 7d535a
    milu_t    milu = options->ILU_MILU;
kusano 7d535a
    double    fill_tol;
kusano 7d535a
    int       pivrow;	/* pivotal row number in the original matrix A */
kusano 7d535a
    int       nseg1;	/* no of segments in U-column above panel row jcol */
kusano 7d535a
    int       nseg;	/* no of segments in each U-column */
kusano 7d535a
    register int jcol;
kusano 7d535a
    register int kcol;	/* end column of a relaxed snode */
kusano 7d535a
    register int icol;
kusano 7d535a
    register int i, k, jj, new_next, iinfo;
kusano 7d535a
    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
kusano 7d535a
    int       w_def;	/* upper bound on panel width */
kusano 7d535a
    int       usepr, iperm_r_allocated = 0;
kusano 7d535a
    int       nnzL, nnzU;
kusano 7d535a
    int       *panel_histo = stat->panel_histo;
kusano 7d535a
    flops_t   *ops = stat->ops;
kusano 7d535a
kusano 7d535a
    int       last_drop;/* the last column which the dropping rules applied */
kusano 7d535a
    int       quota;
kusano 7d535a
    int       nnzAj;	/* number of nonzeros in A(:,1:j) */
kusano 7d535a
    int       nnzLj, nnzUj;
kusano 7d535a
    double    tol_L = drop_tol, tol_U = drop_tol;
kusano 7d535a
    complex zero = {0.0, 0.0};
kusano 7d535a
kusano 7d535a
    /* Executable */	   
kusano 7d535a
    iinfo    = 0;
kusano 7d535a
    m	     = A->nrow;
kusano 7d535a
    n	     = A->ncol;
kusano 7d535a
    min_mn   = SUPERLU_MIN(m, n);
kusano 7d535a
    Astore   = A->Store;
kusano 7d535a
    a	     = Astore->nzval;
kusano 7d535a
    asub     = Astore->rowind;
kusano 7d535a
    xa_begin = Astore->colbeg;
kusano 7d535a
    xa_end   = Astore->colend;
kusano 7d535a
kusano 7d535a
    /* Allocate storage common to the factor routines */
kusano 7d535a
    *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
kusano 7d535a
		       gamma, L, U, &Glu, &iwork, &cwork);
kusano 7d535a
    if ( *info ) return;
kusano 7d535a
kusano 7d535a
    xsup    = Glu.xsup;
kusano 7d535a
    supno   = Glu.supno;
kusano 7d535a
    xlsub   = Glu.xlsub;
kusano 7d535a
    xlusup  = Glu.xlusup;
kusano 7d535a
    xusub   = Glu.xusub;
kusano 7d535a
kusano 7d535a
    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
kusano 7d535a
	     &repfnz, &panel_lsub, &marker_relax, &marker);
kusano 7d535a
    cSetRWork(m, panel_size, cwork, &dense, &tempv);
kusano 7d535a
kusano 7d535a
    usepr = (fact == SamePattern_SameRowPerm);
kusano 7d535a
    if ( usepr ) {
kusano 7d535a
	/* Compute the inverse of perm_r */
kusano 7d535a
	iperm_r = (int *) intMalloc(m);
kusano 7d535a
	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
kusano 7d535a
	iperm_r_allocated = 1;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    iperm_c = (int *) intMalloc(n);
kusano 7d535a
    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
kusano 7d535a
    swap = (int *)intMalloc(n);
kusano 7d535a
    for (k = 0; k < n; k++) swap[k] = iperm_c[k];
kusano 7d535a
    iswap = (int *)intMalloc(n);
kusano 7d535a
    for (k = 0; k < n; k++) iswap[k] = perm_c[k];
kusano 7d535a
    amax = (float *) floatMalloc(panel_size);
kusano 7d535a
    if (drop_rule & DROP_SECONDARY)
kusano 7d535a
	iwork2 = (int *)intMalloc(n);
kusano 7d535a
    else
kusano 7d535a
	iwork2 = NULL;
kusano 7d535a
kusano 7d535a
    nnzAj = 0;
kusano 7d535a
    nnzLj = 0;
kusano 7d535a
    nnzUj = 0;
kusano 7d535a
    last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
kusano 7d535a
kusano 7d535a
    /* Identify relaxed snodes */
kusano 7d535a
    relax_end = (int *) intMalloc(n);
kusano 7d535a
    relax_fsupc = (int *) intMalloc(n);
kusano 7d535a
    if ( options->SymmetricMode == YES )
kusano 7d535a
	ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
kusano 7d535a
    else
kusano 7d535a
	ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
kusano 7d535a
kusano 7d535a
    ifill (perm_r, m, EMPTY);
kusano 7d535a
    ifill (marker, m * NO_MARKER, EMPTY);
kusano 7d535a
    supno[0] = -1;
kusano 7d535a
    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
kusano 7d535a
    w_def    = panel_size;
kusano 7d535a
kusano 7d535a
    /* Mark the rows used by relaxed supernodes */
kusano 7d535a
    ifill (marker_relax, m, EMPTY);
kusano 7d535a
    i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
kusano 7d535a
	         asub, marker_relax);
kusano 7d535a
#if ( PRNTlevel >= 1)
kusano 7d535a
    printf("%d relaxed supernodes.\n", i);
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    /*
kusano 7d535a
     * Work on one "panel" at a time. A panel is one of the following:
kusano 7d535a
     *	   (a) a relaxed supernode at the bottom of the etree, or
kusano 7d535a
     *	   (b) panel_size contiguous columns, defined by the user
kusano 7d535a
     */
kusano 7d535a
    for (jcol = 0; jcol < min_mn; ) {
kusano 7d535a
kusano 7d535a
	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
kusano 7d535a
	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
kusano 7d535a
	    panel_histo[kcol-jcol+1]++;
kusano 7d535a
kusano 7d535a
	    /* Drop small rows in the previous supernode. */
kusano 7d535a
	    if (jcol > 0 && jcol < last_drop) {
kusano 7d535a
		int first = xsup[supno[jcol - 1]];
kusano 7d535a
		int last = jcol - 1;
kusano 7d535a
		int quota;
kusano 7d535a
kusano 7d535a
		/* Compute the quota */
kusano 7d535a
		if (drop_rule & DROP_PROWS)
kusano 7d535a
		    quota = gamma * Astore->nnz / m * (m - first) / m
kusano 7d535a
			    * (last - first + 1);
kusano 7d535a
		else if (drop_rule & DROP_COLUMN) {
kusano 7d535a
		    int i;
kusano 7d535a
		    quota = 0;
kusano 7d535a
		    for (i = first; i <= last; i++)
kusano 7d535a
			quota += xa_end[i] - xa_begin[i];
kusano 7d535a
		    quota = gamma * quota * (m - first) / m;
kusano 7d535a
		} else if (drop_rule & DROP_AREA)
kusano 7d535a
		    quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
kusano 7d535a
			    - nnzLj;
kusano 7d535a
		else
kusano 7d535a
		    quota = m * n;
kusano 7d535a
		fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
kusano 7d535a
kusano 7d535a
		/* Drop small rows */
kusano 7d535a
                stempv = (float *) tempv;
kusano 7d535a
		i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj,
kusano 7d535a
				  &fill_tol, &Glu, stempv, iwork2, 0);
kusano 7d535a
		/* Reset the parameters */
kusano 7d535a
		if (drop_rule & DROP_DYNAMIC) {
kusano 7d535a
		    if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
kusano 7d535a
			     < nnzLj)
kusano 7d535a
			tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
kusano 7d535a
		    else
kusano 7d535a
			tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
kusano 7d535a
		}
kusano 7d535a
		if (fill_tol < 0) iinfo -= (int)fill_tol;
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
		num_drop_L += i * (last - first + 1);
kusano 7d535a
#endif
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    /* --------------------------------------
kusano 7d535a
	     * Factorize the relaxed supernode(jcol:kcol)
kusano 7d535a
	     * -------------------------------------- */
kusano 7d535a
	    /* Determine the union of the row structure of the snode */
kusano 7d535a
	    if ( (*info = ilu_csnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
kusano 7d535a
					 marker, &Glu)) != 0 )
kusano 7d535a
		return;
kusano 7d535a
kusano 7d535a
	    nextu    = xusub[jcol];
kusano 7d535a
	    nextlu   = xlusup[jcol];
kusano 7d535a
	    jsupno   = supno[jcol];
kusano 7d535a
	    fsupc    = xsup[jsupno];
kusano 7d535a
	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
kusano 7d535a
	    nzlumax = Glu.nzlumax;
kusano 7d535a
	    while ( new_next > nzlumax ) {
kusano 7d535a
		if ((*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)))
kusano 7d535a
		    return;
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    for (icol = jcol; icol <= kcol; icol++) {
kusano 7d535a
		xusub[icol+1] = nextu;
kusano 7d535a
kusano 7d535a
		amax[0] = 0.0;
kusano 7d535a
		/* Scatter into SPA dense[*] */
kusano 7d535a
		for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
kusano 7d535a
                    register float tmp = c_abs1 (&a[k]);
kusano 7d535a
		    if (tmp > amax[0]) amax[0] = tmp;
kusano 7d535a
		    dense[asub[k]] = a[k];
kusano 7d535a
		}
kusano 7d535a
		nnzAj += xa_end[icol] - xa_begin[icol];
kusano 7d535a
		if (amax[0] == 0.0) {
kusano 7d535a
		    amax[0] = fill_ini;
kusano 7d535a
#if ( PRNTlevel >= 1)
kusano 7d535a
		    printf("Column %d is entirely zero!\n", icol);
kusano 7d535a
		    fflush(stdout);
kusano 7d535a
#endif
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
		/* Numeric update within the snode */
kusano 7d535a
		csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
kusano 7d535a
kusano 7d535a
		if (usepr) pivrow = iperm_r[icol];
kusano 7d535a
		fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
kusano 7d535a
		if ( (*info = ilu_cpivotL(icol, diag_pivot_thresh, &usepr,
kusano 7d535a
					  perm_r, iperm_c[icol], swap, iswap,
kusano 7d535a
					  marker_relax, &pivrow,
kusano 7d535a
                                          amax[0] * fill_tol, milu, zero,
kusano 7d535a
                                          &Glu, stat)) ) {
kusano 7d535a
		    iinfo++;
kusano 7d535a
		    marker[pivrow] = kcol;
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    jcol = kcol + 1;
kusano 7d535a
kusano 7d535a
	} else { /* Work on one panel of panel_size columns */
kusano 7d535a
kusano 7d535a
	    /* Adjust panel_size so that a panel won't overlap with the next
kusano 7d535a
	     * relaxed snode.
kusano 7d535a
	     */
kusano 7d535a
	    panel_size = w_def;
kusano 7d535a
	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
kusano 7d535a
		if ( relax_end[k] != EMPTY ) {
kusano 7d535a
		    panel_size = k - jcol;
kusano 7d535a
		    break;
kusano 7d535a
		}
kusano 7d535a
	    if ( k == min_mn ) panel_size = min_mn - jcol;
kusano 7d535a
	    panel_histo[panel_size]++;
kusano 7d535a
kusano 7d535a
	    /* symbolic factor on a panel of columns */
kusano 7d535a
	    ilu_cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
kusano 7d535a
                          dense, amax, panel_lsub, segrep, repfnz,
kusano 7d535a
                          marker, parent, xplore, &Glu);
kusano 7d535a
kusano 7d535a
	    /* numeric sup-panel updates in topological order */
kusano 7d535a
	    cpanel_bmod(m, panel_size, jcol, nseg1, dense,
kusano 7d535a
			tempv, segrep, repfnz, &Glu, stat);
kusano 7d535a
kusano 7d535a
	    /* Sparse LU within the panel, and below panel diagonal */
kusano 7d535a
	    for (jj = jcol; jj < jcol + panel_size; jj++) {
kusano 7d535a
kusano 7d535a
		k = (jj - jcol) * m; /* column index for w-wide arrays */
kusano 7d535a
kusano 7d535a
		nseg = nseg1;	/* Begin after all the panel segments */
kusano 7d535a
kusano 7d535a
		nnzAj += xa_end[jj] - xa_begin[jj];
kusano 7d535a
kusano 7d535a
		if ((*info = ilu_ccolumn_dfs(m, jj, perm_r, &nseg,
kusano 7d535a
					     &panel_lsub[k], segrep, &repfnz[k],
kusano 7d535a
					     marker, parent, xplore, &Glu)))
kusano 7d535a
		    return;
kusano 7d535a
kusano 7d535a
		/* Numeric updates */
kusano 7d535a
		if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k],
kusano 7d535a
					  tempv, &segrep[nseg1], &repfnz[k],
kusano 7d535a
					  jcol, &Glu, stat)) != 0) return;
kusano 7d535a
kusano 7d535a
		/* Make a fill-in position if the column is entirely zero */
kusano 7d535a
		if (xlsub[jj + 1] == xlsub[jj]) {
kusano 7d535a
		    register int i, row;
kusano 7d535a
		    int nextl;
kusano 7d535a
		    int nzlmax = Glu.nzlmax;
kusano 7d535a
		    int *lsub = Glu.lsub;
kusano 7d535a
		    int *marker2 = marker + 2 * m;
kusano 7d535a
kusano 7d535a
		    /* Allocate memory */
kusano 7d535a
		    nextl = xlsub[jj] + 1;
kusano 7d535a
		    if (nextl >= nzlmax) {
kusano 7d535a
			int error = cLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu);
kusano 7d535a
			if (error) { *info = error; return; }
kusano 7d535a
			lsub = Glu.lsub;
kusano 7d535a
		    }
kusano 7d535a
		    xlsub[jj + 1]++;
kusano 7d535a
		    assert(xlusup[jj]==xlusup[jj+1]);
kusano 7d535a
		    xlusup[jj + 1]++;
kusano 7d535a
		    Glu.lusup[xlusup[jj]] = zero;
kusano 7d535a
kusano 7d535a
		    /* Choose a row index (pivrow) for fill-in */
kusano 7d535a
		    for (i = jj; i < n; i++)
kusano 7d535a
			if (marker_relax[swap[i]] <= jj) break;
kusano 7d535a
		    row = swap[i];
kusano 7d535a
		    marker2[row] = jj;
kusano 7d535a
		    lsub[xlsub[jj]] = row;
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
		    printf("Fill col %d.\n", jj);
kusano 7d535a
		    fflush(stdout);
kusano 7d535a
#endif
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
		/* Computer the quota */
kusano 7d535a
		if (drop_rule & DROP_PROWS)
kusano 7d535a
		    quota = gamma * Astore->nnz / m * jj / m;
kusano 7d535a
		else if (drop_rule & DROP_COLUMN)
kusano 7d535a
		    quota = gamma * (xa_end[jj] - xa_begin[jj]) *
kusano 7d535a
			    (jj + 1) / m;
kusano 7d535a
		else if (drop_rule & DROP_AREA)
kusano 7d535a
		    quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
kusano 7d535a
		else
kusano 7d535a
		    quota = m;
kusano 7d535a
kusano 7d535a
		/* Copy the U-segments to ucol[*] and drop small entries */
kusano 7d535a
		if ((*info = ilu_ccopy_to_ucol(jj, nseg, segrep, &repfnz[k],
kusano 7d535a
					       perm_r, &dense[k], drop_rule,
kusano 7d535a
					       milu, amax[jj - jcol] * tol_U,
kusano 7d535a
					       quota, &drop_sum, &nnzUj, &Glu,
kusano 7d535a
					       iwork2)) != 0)
kusano 7d535a
		    return;
kusano 7d535a
kusano 7d535a
		/* Reset the dropping threshold if required */
kusano 7d535a
		if (drop_rule & DROP_DYNAMIC) {
kusano 7d535a
		    if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
kusano 7d535a
			tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
kusano 7d535a
		    else
kusano 7d535a
			tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
                cs_mult(&drop_sum, &drop_sum, MILU_ALPHA);
kusano 7d535a
		if (usepr) pivrow = iperm_r[jj];
kusano 7d535a
		fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
kusano 7d535a
		if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
kusano 7d535a
					  iperm_c[jj], swap, iswap,
kusano 7d535a
					  marker_relax, &pivrow,
kusano 7d535a
					  amax[jj - jcol] * fill_tol, milu,
kusano 7d535a
					  drop_sum, &Glu, stat)) ) {
kusano 7d535a
		    iinfo++;
kusano 7d535a
		    marker[m + pivrow] = jj;
kusano 7d535a
		    marker[2 * m + pivrow] = jj;
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
		/* Reset repfnz[] for this column */
kusano 7d535a
		resetrep_col (nseg, segrep, &repfnz[k]);
kusano 7d535a
kusano 7d535a
		/* Start a new supernode, drop the previous one */
kusano 7d535a
		if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
kusano 7d535a
		    int first = xsup[supno[jj - 1]];
kusano 7d535a
		    int last = jj - 1;
kusano 7d535a
		    int quota;
kusano 7d535a
kusano 7d535a
		    /* Compute the quota */
kusano 7d535a
		    if (drop_rule & DROP_PROWS)
kusano 7d535a
			quota = gamma * Astore->nnz / m * (m - first) / m
kusano 7d535a
				* (last - first + 1);
kusano 7d535a
		    else if (drop_rule & DROP_COLUMN) {
kusano 7d535a
			int i;
kusano 7d535a
			quota = 0;
kusano 7d535a
			for (i = first; i <= last; i++)
kusano 7d535a
			    quota += xa_end[i] - xa_begin[i];
kusano 7d535a
			quota = gamma * quota * (m - first) / m;
kusano 7d535a
		    } else if (drop_rule & DROP_AREA)
kusano 7d535a
			quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
kusano 7d535a
				/ m) - nnzLj;
kusano 7d535a
		    else
kusano 7d535a
			quota = m * n;
kusano 7d535a
		    fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
kusano 7d535a
			    (double)min_mn);
kusano 7d535a
kusano 7d535a
		    /* Drop small rows */
kusano 7d535a
                    stempv = (float *) tempv;
kusano 7d535a
		    i = ilu_cdrop_row(options, first, last, tol_L, quota,
kusano 7d535a
				      &nnzLj, &fill_tol, &Glu, stempv, iwork2,
kusano 7d535a
				      1);
kusano 7d535a
kusano 7d535a
		    /* Reset the parameters */
kusano 7d535a
		    if (drop_rule & DROP_DYNAMIC) {
kusano 7d535a
			if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
kusano 7d535a
				< nnzLj)
kusano 7d535a
			    tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
kusano 7d535a
			else
kusano 7d535a
			    tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
kusano 7d535a
		    }
kusano 7d535a
		    if (fill_tol < 0) iinfo -= (int)fill_tol;
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
		    num_drop_L += i * (last - first + 1);
kusano 7d535a
#endif
kusano 7d535a
		} /* if start a new supernode */
kusano 7d535a
kusano 7d535a
	    } /* for */
kusano 7d535a
kusano 7d535a
	    jcol += panel_size; /* Move to the next panel */
kusano 7d535a
kusano 7d535a
	} /* else */
kusano 7d535a
kusano 7d535a
    } /* for */
kusano 7d535a
kusano 7d535a
    *info = iinfo;
kusano 7d535a
kusano 7d535a
    if ( m > n ) {
kusano 7d535a
	k = 0;
kusano 7d535a
	for (i = 0; i < m; ++i)
kusano 7d535a
	    if ( perm_r[i] == EMPTY ) {
kusano 7d535a
		perm_r[i] = n + k;
kusano 7d535a
		++k;
kusano 7d535a
	    }
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    ilu_countnz(min_mn, &nnzL, &nnzU, &Glu);
kusano 7d535a
    fixupL(min_mn, perm_r, &Glu);
kusano 7d535a
kusano 7d535a
    cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */
kusano 7d535a
kusano 7d535a
    if ( fact == SamePattern_SameRowPerm ) {
kusano 7d535a
	/* L and U structures may have changed due to possibly different
kusano 7d535a
	   pivoting, even though the storage is available.
kusano 7d535a
	   There could also be memory expansions, so the array locations
kusano 7d535a
	   may have changed, */
kusano 7d535a
	((SCformat *)L->Store)->nnz = nnzL;
kusano 7d535a
	((SCformat *)L->Store)->nsuper = Glu.supno[n];
kusano 7d535a
	((SCformat *)L->Store)->nzval = Glu.lusup;
kusano 7d535a
	((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
kusano 7d535a
	((SCformat *)L->Store)->rowind = Glu.lsub;
kusano 7d535a
	((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
kusano 7d535a
	((NCformat *)U->Store)->nnz = nnzU;
kusano 7d535a
	((NCformat *)U->Store)->nzval = Glu.ucol;
kusano 7d535a
	((NCformat *)U->Store)->rowind = Glu.usub;
kusano 7d535a
	((NCformat *)U->Store)->colptr = Glu.xusub;
kusano 7d535a
    } else {
kusano 7d535a
	cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
kusano 7d535a
				 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
kusano 7d535a
				 Glu.xsup, SLU_SC, SLU_C, SLU_TRLU);
kusano 7d535a
	cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
kusano 7d535a
			       Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    ops[FACT] += ops[TRSV] + ops[GEMV];
kusano 7d535a
    stat->expansions = --(Glu.num_expansions);
kusano 7d535a
kusano 7d535a
    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
kusano 7d535a
    SUPERLU_FREE (iperm_c);
kusano 7d535a
    SUPERLU_FREE (relax_end);
kusano 7d535a
    SUPERLU_FREE (swap);
kusano 7d535a
    SUPERLU_FREE (iswap);
kusano 7d535a
    SUPERLU_FREE (relax_fsupc);
kusano 7d535a
    SUPERLU_FREE (amax);
kusano 7d535a
    if ( iwork2 ) SUPERLU_FREE (iwork2);
kusano 7d535a
kusano 7d535a
}