|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file sgsitf.c
|
|
kusano |
7d535a |
* \brief Computes an ILU factorization of a general sparse matrix
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 4.1) --
|
|
kusano |
7d535a |
* Lawrence Berkeley National Laboratory.
|
|
kusano |
7d535a |
* June 30, 2009
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include "slu_sdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
int num_drop_L;
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* SGSITRF computes an ILU factorization of a general sparse m-by-n
|
|
kusano |
7d535a |
* matrix A using partial pivoting with row interchanges.
|
|
kusano |
7d535a |
* The factorization has the form
|
|
kusano |
7d535a |
* Pr * A = L * U
|
|
kusano |
7d535a |
* where Pr is a row permutation matrix, L is lower triangular with unit
|
|
kusano |
7d535a |
* diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
|
|
kusano |
7d535a |
* triangular (upper trapezoidal if A->nrow < A->ncol).
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* See supermatrix.h for the definition of 'SuperMatrix' structure.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Arguments
|
|
kusano |
7d535a |
* =========
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* options (input) superlu_options_t*
|
|
kusano |
7d535a |
* The structure defines the input parameters to control
|
|
kusano |
7d535a |
* how the ILU decomposition will be performed.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* A (input) SuperMatrix*
|
|
kusano |
7d535a |
* Original matrix A, permuted by columns, of dimension
|
|
kusano |
7d535a |
* (A->nrow, A->ncol). The type of A can be:
|
|
kusano |
7d535a |
* Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* relax (input) int
|
|
kusano |
7d535a |
* To control degree of relaxing supernodes. If the number
|
|
kusano |
7d535a |
* of nodes (columns) in a subtree of the elimination tree is less
|
|
kusano |
7d535a |
* than relax, this subtree is considered as one supernode,
|
|
kusano |
7d535a |
* regardless of the row structures of those columns.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* panel_size (input) int
|
|
kusano |
7d535a |
* A panel consists of at most panel_size consecutive columns.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* etree (input) int*, dimension (A->ncol)
|
|
kusano |
7d535a |
* Elimination tree of A'*A.
|
|
kusano |
7d535a |
* Note: etree is a vector of parent pointers for a forest whose
|
|
kusano |
7d535a |
* vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
|
|
kusano |
7d535a |
* On input, the columns of A should be permuted so that the
|
|
kusano |
7d535a |
* etree is in a certain postorder.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* work (input/output) void*, size (lwork) (in bytes)
|
|
kusano |
7d535a |
* User-supplied work space and space for the output data structures.
|
|
kusano |
7d535a |
* Not referenced if lwork = 0;
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* lwork (input) int
|
|
kusano |
7d535a |
* Specifies the size of work array in bytes.
|
|
kusano |
7d535a |
* = 0: allocate space internally by system malloc;
|
|
kusano |
7d535a |
* > 0: use user-supplied work array of length lwork in bytes,
|
|
kusano |
7d535a |
* returns error if space runs out.
|
|
kusano |
7d535a |
* = -1: the routine guesses the amount of space needed without
|
|
kusano |
7d535a |
* performing the factorization, and returns it in
|
|
kusano |
7d535a |
* *info; no other side effects.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* perm_c (input) int*, dimension (A->ncol)
|
|
kusano |
7d535a |
* Column permutation vector, which defines the
|
|
kusano |
7d535a |
* permutation matrix Pc; perm_c[i] = j means column i of A is
|
|
kusano |
7d535a |
* in position j in A*Pc.
|
|
kusano |
7d535a |
* When searching for diagonal, perm_c[*] is applied to the
|
|
kusano |
7d535a |
* row subscripts of A, so that diagonal threshold pivoting
|
|
kusano |
7d535a |
* can find the diagonal of A, rather than that of A*Pc.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* perm_r (input/output) int*, dimension (A->nrow)
|
|
kusano |
7d535a |
* Row permutation vector which defines the permutation matrix Pr,
|
|
kusano |
7d535a |
* perm_r[i] = j means row i of A is in position j in Pr*A.
|
|
kusano |
7d535a |
* If options->Fact = SamePattern_SameRowPerm, the pivoting routine
|
|
kusano |
7d535a |
* will try to use the input perm_r, unless a certain threshold
|
|
kusano |
7d535a |
* criterion is violated. In that case, perm_r is overwritten by
|
|
kusano |
7d535a |
* a new permutation determined by partial pivoting or diagonal
|
|
kusano |
7d535a |
* threshold pivoting.
|
|
kusano |
7d535a |
* Otherwise, perm_r is output argument;
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* L (output) SuperMatrix*
|
|
kusano |
7d535a |
* The factor L from the factorization Pr*A=L*U; use compressed row
|
|
kusano |
7d535a |
* subscripts storage for supernodes, i.e., L has type:
|
|
kusano |
7d535a |
* Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* U (output) SuperMatrix*
|
|
kusano |
7d535a |
* The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
|
|
kusano |
7d535a |
* storage scheme, i.e., U has types: Stype = SLU_NC,
|
|
kusano |
7d535a |
* Dtype = SLU_S, Mtype = SLU_TRU.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* stat (output) SuperLUStat_t*
|
|
kusano |
7d535a |
* Record the statistics on runtime and floating-point operation count.
|
|
kusano |
7d535a |
* See slu_util.h for the definition of 'SuperLUStat_t'.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* info (output) int*
|
|
kusano |
7d535a |
* = 0: successful exit
|
|
kusano |
7d535a |
* < 0: if info = -i, the i-th argument had an illegal value
|
|
kusano |
7d535a |
* > 0: if info = i, and i is
|
|
kusano |
7d535a |
* <= A->ncol: number of zero pivots. They are replaced by small
|
|
kusano |
7d535a |
* entries according to options->ILU_FillTol.
|
|
kusano |
7d535a |
* > A->ncol: number of bytes allocated when memory allocation
|
|
kusano |
7d535a |
* failure occurred, plus A->ncol. If lwork = -1, it is
|
|
kusano |
7d535a |
* the estimated amount of space needed, plus A->ncol.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* ======================================================================
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Local Working Arrays:
|
|
kusano |
7d535a |
* ======================
|
|
kusano |
7d535a |
* m = number of rows in the matrix
|
|
kusano |
7d535a |
* n = number of columns in the matrix
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* marker[0:3*m-1]: marker[i] = j means that node i has been
|
|
kusano |
7d535a |
* reached when working on column j.
|
|
kusano |
7d535a |
* Storage: relative to original row subscripts
|
|
kusano |
7d535a |
* NOTE: There are 4 of them:
|
|
kusano |
7d535a |
* marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
|
|
kusano |
7d535a |
* marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
|
|
kusano |
7d535a |
* marker_relax(has its own space) is used for relaxed supernodes.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* parent[0:m-1]: parent vector used during dfs
|
|
kusano |
7d535a |
* Storage: relative to new row subscripts
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
|
|
kusano |
7d535a |
* unexplored neighbor of i in lsub[*]
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* segrep[0:nseg-1]: contains the list of supernodal representatives
|
|
kusano |
7d535a |
* in topological order of the dfs. A supernode representative is the
|
|
kusano |
7d535a |
* last column of a supernode.
|
|
kusano |
7d535a |
* The maximum size of segrep[] is n.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
|
|
kusano |
7d535a |
* supernodal representative r, repfnz[r] is the location of the first
|
|
kusano |
7d535a |
* nonzero in this segment. It is also used during the dfs: repfnz[r]>0
|
|
kusano |
7d535a |
* indicates the supernode r has been explored.
|
|
kusano |
7d535a |
* NOTE: There are W of them, each used for one column of a panel.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
|
|
kusano |
7d535a |
* the panel diagonal. These are filled in during dpanel_dfs(), and are
|
|
kusano |
7d535a |
* used later in the inner LU factorization within the panel.
|
|
kusano |
7d535a |
* panel_lsub[]/dense[] pair forms the SPA data structure.
|
|
kusano |
7d535a |
* NOTE: There are W of them.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
|
|
kusano |
7d535a |
* NOTE: there are W of them.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* tempv[0:*]: real temporary used for dense numeric kernels;
|
|
kusano |
7d535a |
* The size of this array is defined by NUM_TEMPV() in slu_util.h.
|
|
kusano |
7d535a |
* It is also used by the dropping routine ilu_ddrop_row().
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
sgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
|
|
kusano |
7d535a |
int *etree, void *work, int lwork, int *perm_c, int *perm_r,
|
|
kusano |
7d535a |
SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/* Local working arrays */
|
|
kusano |
7d535a |
NCPformat *Astore;
|
|
kusano |
7d535a |
int *iperm_r = NULL; /* inverse of perm_r; used when
|
|
kusano |
7d535a |
options->Fact == SamePattern_SameRowPerm */
|
|
kusano |
7d535a |
int *iperm_c; /* inverse of perm_c */
|
|
kusano |
7d535a |
int *swap, *iswap; /* swap is used to store the row permutation
|
|
kusano |
7d535a |
during the factorization. Initially, it is set
|
|
kusano |
7d535a |
to iperm_c (row indeces of Pc*A*Pc').
|
|
kusano |
7d535a |
iswap is the inverse of swap. After the
|
|
kusano |
7d535a |
factorization, it is equal to perm_r. */
|
|
kusano |
7d535a |
int *iwork;
|
|
kusano |
7d535a |
float *swork;
|
|
kusano |
7d535a |
int *segrep, *repfnz, *parent, *xplore;
|
|
kusano |
7d535a |
int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
|
|
kusano |
7d535a |
int *marker, *marker_relax;
|
|
kusano |
7d535a |
float *dense, *tempv;
|
|
kusano |
7d535a |
int *relax_end, *relax_fsupc;
|
|
kusano |
7d535a |
float *a;
|
|
kusano |
7d535a |
int *asub;
|
|
kusano |
7d535a |
int *xa_begin, *xa_end;
|
|
kusano |
7d535a |
int *xsup, *supno;
|
|
kusano |
7d535a |
int *xlsub, *xlusup, *xusub;
|
|
kusano |
7d535a |
int nzlumax;
|
|
kusano |
7d535a |
float *amax;
|
|
kusano |
7d535a |
float drop_sum;
|
|
kusano |
7d535a |
float alpha, omega; /* used in MILU, mimicing DRIC */
|
|
kusano |
7d535a |
static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
|
|
kusano |
7d535a |
float *swork2; /* used by the second dropping rule */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Local scalars */
|
|
kusano |
7d535a |
fact_t fact = options->Fact;
|
|
kusano |
7d535a |
double diag_pivot_thresh = options->DiagPivotThresh;
|
|
kusano |
7d535a |
double drop_tol = options->ILU_DropTol; /* tau */
|
|
kusano |
7d535a |
double fill_ini = options->ILU_FillTol; /* tau^hat */
|
|
kusano |
7d535a |
double gamma = options->ILU_FillFactor;
|
|
kusano |
7d535a |
int drop_rule = options->ILU_DropRule;
|
|
kusano |
7d535a |
milu_t milu = options->ILU_MILU;
|
|
kusano |
7d535a |
double fill_tol;
|
|
kusano |
7d535a |
int pivrow; /* pivotal row number in the original matrix A */
|
|
kusano |
7d535a |
int nseg1; /* no of segments in U-column above panel row jcol */
|
|
kusano |
7d535a |
int nseg; /* no of segments in each U-column */
|
|
kusano |
7d535a |
register int jcol;
|
|
kusano |
7d535a |
register int kcol; /* end column of a relaxed snode */
|
|
kusano |
7d535a |
register int icol;
|
|
kusano |
7d535a |
register int i, k, jj, new_next, iinfo;
|
|
kusano |
7d535a |
int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
|
|
kusano |
7d535a |
int w_def; /* upper bound on panel width */
|
|
kusano |
7d535a |
int usepr, iperm_r_allocated = 0;
|
|
kusano |
7d535a |
int nnzL, nnzU;
|
|
kusano |
7d535a |
int *panel_histo = stat->panel_histo;
|
|
kusano |
7d535a |
flops_t *ops = stat->ops;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int last_drop;/* the last column which the dropping rules applied */
|
|
kusano |
7d535a |
int quota;
|
|
kusano |
7d535a |
int nnzAj; /* number of nonzeros in A(:,1:j) */
|
|
kusano |
7d535a |
int nnzLj, nnzUj;
|
|
kusano |
7d535a |
double tol_L = drop_tol, tol_U = drop_tol;
|
|
kusano |
7d535a |
float zero = 0.0;
|
|
kusano |
7d535a |
float one = 1.0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Executable */
|
|
kusano |
7d535a |
iinfo = 0;
|
|
kusano |
7d535a |
m = A->nrow;
|
|
kusano |
7d535a |
n = A->ncol;
|
|
kusano |
7d535a |
min_mn = SUPERLU_MIN(m, n);
|
|
kusano |
7d535a |
Astore = A->Store;
|
|
kusano |
7d535a |
a = Astore->nzval;
|
|
kusano |
7d535a |
asub = Astore->rowind;
|
|
kusano |
7d535a |
xa_begin = Astore->colbeg;
|
|
kusano |
7d535a |
xa_end = Astore->colend;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Allocate storage common to the factor routines */
|
|
kusano |
7d535a |
*info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
|
|
kusano |
7d535a |
gamma, L, U, &Glu, &iwork, &swork);
|
|
kusano |
7d535a |
if ( *info ) return;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
xsup = Glu.xsup;
|
|
kusano |
7d535a |
supno = Glu.supno;
|
|
kusano |
7d535a |
xlsub = Glu.xlsub;
|
|
kusano |
7d535a |
xlusup = Glu.xlusup;
|
|
kusano |
7d535a |
xusub = Glu.xusub;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
|
|
kusano |
7d535a |
&repfnz, &panel_lsub, &marker_relax, &marker);
|
|
kusano |
7d535a |
sSetRWork(m, panel_size, swork, &dense, &tempv);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
usepr = (fact == SamePattern_SameRowPerm);
|
|
kusano |
7d535a |
if ( usepr ) {
|
|
kusano |
7d535a |
/* Compute the inverse of perm_r */
|
|
kusano |
7d535a |
iperm_r = (int *) intMalloc(m);
|
|
kusano |
7d535a |
for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
|
|
kusano |
7d535a |
iperm_r_allocated = 1;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
iperm_c = (int *) intMalloc(n);
|
|
kusano |
7d535a |
for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
|
|
kusano |
7d535a |
swap = (int *)intMalloc(n);
|
|
kusano |
7d535a |
for (k = 0; k < n; k++) swap[k] = iperm_c[k];
|
|
kusano |
7d535a |
iswap = (int *)intMalloc(n);
|
|
kusano |
7d535a |
for (k = 0; k < n; k++) iswap[k] = perm_c[k];
|
|
kusano |
7d535a |
amax = (float *) floatMalloc(panel_size);
|
|
kusano |
7d535a |
if (drop_rule & DROP_SECONDARY)
|
|
kusano |
7d535a |
swork2 = (float *)floatMalloc(n);
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
swork2 = NULL;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
nnzAj = 0;
|
|
kusano |
7d535a |
nnzLj = 0;
|
|
kusano |
7d535a |
nnzUj = 0;
|
|
kusano |
7d535a |
last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
|
|
kusano |
7d535a |
alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Identify relaxed snodes */
|
|
kusano |
7d535a |
relax_end = (int *) intMalloc(n);
|
|
kusano |
7d535a |
relax_fsupc = (int *) intMalloc(n);
|
|
kusano |
7d535a |
if ( options->SymmetricMode == YES )
|
|
kusano |
7d535a |
ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
ifill (perm_r, m, EMPTY);
|
|
kusano |
7d535a |
ifill (marker, m * NO_MARKER, EMPTY);
|
|
kusano |
7d535a |
supno[0] = -1;
|
|
kusano |
7d535a |
xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
|
|
kusano |
7d535a |
w_def = panel_size;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Mark the rows used by relaxed supernodes */
|
|
kusano |
7d535a |
ifill (marker_relax, m, EMPTY);
|
|
kusano |
7d535a |
i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
|
|
kusano |
7d535a |
asub, marker_relax);
|
|
kusano |
7d535a |
#if ( PRNTlevel >= 1)
|
|
kusano |
7d535a |
printf("%d relaxed supernodes.\n", i);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Work on one "panel" at a time. A panel is one of the following:
|
|
kusano |
7d535a |
* (a) a relaxed supernode at the bottom of the etree, or
|
|
kusano |
7d535a |
* (b) panel_size contiguous columns, defined by the user
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
for (jcol = 0; jcol < min_mn; ) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
|
|
kusano |
7d535a |
kcol = relax_end[jcol]; /* end of the relaxed snode */
|
|
kusano |
7d535a |
panel_histo[kcol-jcol+1]++;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Drop small rows in the previous supernode. */
|
|
kusano |
7d535a |
if (jcol > 0 && jcol < last_drop) {
|
|
kusano |
7d535a |
int first = xsup[supno[jcol - 1]];
|
|
kusano |
7d535a |
int last = jcol - 1;
|
|
kusano |
7d535a |
int quota;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Compute the quota */
|
|
kusano |
7d535a |
if (drop_rule & DROP_PROWS)
|
|
kusano |
7d535a |
quota = gamma * Astore->nnz / m * (m - first) / m
|
|
kusano |
7d535a |
* (last - first + 1);
|
|
kusano |
7d535a |
else if (drop_rule & DROP_COLUMN) {
|
|
kusano |
7d535a |
int i;
|
|
kusano |
7d535a |
quota = 0;
|
|
kusano |
7d535a |
for (i = first; i <= last; i++)
|
|
kusano |
7d535a |
quota += xa_end[i] - xa_begin[i];
|
|
kusano |
7d535a |
quota = gamma * quota * (m - first) / m;
|
|
kusano |
7d535a |
} else if (drop_rule & DROP_AREA)
|
|
kusano |
7d535a |
quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
|
|
kusano |
7d535a |
- nnzLj;
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
quota = m * n;
|
|
kusano |
7d535a |
fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Drop small rows */
|
|
kusano |
7d535a |
i = ilu_sdrop_row(options, first, last, tol_L, quota, &nnzLj,
|
|
kusano |
7d535a |
&fill_tol, &Glu, tempv, swork2, 0);
|
|
kusano |
7d535a |
/* Reset the parameters */
|
|
kusano |
7d535a |
if (drop_rule & DROP_DYNAMIC) {
|
|
kusano |
7d535a |
if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
|
|
kusano |
7d535a |
< nnzLj)
|
|
kusano |
7d535a |
tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (fill_tol < 0) iinfo -= (int)fill_tol;
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
num_drop_L += i * (last - first + 1);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* --------------------------------------
|
|
kusano |
7d535a |
* Factorize the relaxed supernode(jcol:kcol)
|
|
kusano |
7d535a |
* -------------------------------------- */
|
|
kusano |
7d535a |
/* Determine the union of the row structure of the snode */
|
|
kusano |
7d535a |
if ( (*info = ilu_ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
|
|
kusano |
7d535a |
marker, &Glu)) != 0 )
|
|
kusano |
7d535a |
return;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
nextu = xusub[jcol];
|
|
kusano |
7d535a |
nextlu = xlusup[jcol];
|
|
kusano |
7d535a |
jsupno = supno[jcol];
|
|
kusano |
7d535a |
fsupc = xsup[jsupno];
|
|
kusano |
7d535a |
new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
|
|
kusano |
7d535a |
nzlumax = Glu.nzlumax;
|
|
kusano |
7d535a |
while ( new_next > nzlumax ) {
|
|
kusano |
7d535a |
if ((*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)))
|
|
kusano |
7d535a |
return;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (icol = jcol; icol <= kcol; icol++) {
|
|
kusano |
7d535a |
xusub[icol+1] = nextu;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
amax[0] = 0.0;
|
|
kusano |
7d535a |
/* Scatter into SPA dense[*] */
|
|
kusano |
7d535a |
for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
|
|
kusano |
7d535a |
register float tmp = fabs(a[k]);
|
|
kusano |
7d535a |
if (tmp > amax[0]) amax[0] = tmp;
|
|
kusano |
7d535a |
dense[asub[k]] = a[k];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
nnzAj += xa_end[icol] - xa_begin[icol];
|
|
kusano |
7d535a |
if (amax[0] == 0.0) {
|
|
kusano |
7d535a |
amax[0] = fill_ini;
|
|
kusano |
7d535a |
#if ( PRNTlevel >= 1)
|
|
kusano |
7d535a |
printf("Column %d is entirely zero!\n", icol);
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Numeric update within the snode */
|
|
kusano |
7d535a |
ssnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (usepr) pivrow = iperm_r[icol];
|
|
kusano |
7d535a |
fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
|
|
kusano |
7d535a |
if ( (*info = ilu_spivotL(icol, diag_pivot_thresh, &usepr,
|
|
kusano |
7d535a |
perm_r, iperm_c[icol], swap, iswap,
|
|
kusano |
7d535a |
marker_relax, &pivrow,
|
|
kusano |
7d535a |
amax[0] * fill_tol, milu, zero,
|
|
kusano |
7d535a |
&Glu, stat)) ) {
|
|
kusano |
7d535a |
iinfo++;
|
|
kusano |
7d535a |
marker[pivrow] = kcol;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
jcol = kcol + 1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} else { /* Work on one panel of panel_size columns */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Adjust panel_size so that a panel won't overlap with the next
|
|
kusano |
7d535a |
* relaxed snode.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
panel_size = w_def;
|
|
kusano |
7d535a |
for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
|
|
kusano |
7d535a |
if ( relax_end[k] != EMPTY ) {
|
|
kusano |
7d535a |
panel_size = k - jcol;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( k == min_mn ) panel_size = min_mn - jcol;
|
|
kusano |
7d535a |
panel_histo[panel_size]++;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* symbolic factor on a panel of columns */
|
|
kusano |
7d535a |
ilu_spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
|
|
kusano |
7d535a |
dense, amax, panel_lsub, segrep, repfnz,
|
|
kusano |
7d535a |
marker, parent, xplore, &Glu);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* numeric sup-panel updates in topological order */
|
|
kusano |
7d535a |
spanel_bmod(m, panel_size, jcol, nseg1, dense,
|
|
kusano |
7d535a |
tempv, segrep, repfnz, &Glu, stat);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Sparse LU within the panel, and below panel diagonal */
|
|
kusano |
7d535a |
for (jj = jcol; jj < jcol + panel_size; jj++) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
k = (jj - jcol) * m; /* column index for w-wide arrays */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
nseg = nseg1; /* Begin after all the panel segments */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
nnzAj += xa_end[jj] - xa_begin[jj];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ((*info = ilu_scolumn_dfs(m, jj, perm_r, &nseg,
|
|
kusano |
7d535a |
&panel_lsub[k], segrep, &repfnz[k],
|
|
kusano |
7d535a |
marker, parent, xplore, &Glu)))
|
|
kusano |
7d535a |
return;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Numeric updates */
|
|
kusano |
7d535a |
if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
|
|
kusano |
7d535a |
tempv, &segrep[nseg1], &repfnz[k],
|
|
kusano |
7d535a |
jcol, &Glu, stat)) != 0) return;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Make a fill-in position if the column is entirely zero */
|
|
kusano |
7d535a |
if (xlsub[jj + 1] == xlsub[jj]) {
|
|
kusano |
7d535a |
register int i, row;
|
|
kusano |
7d535a |
int nextl;
|
|
kusano |
7d535a |
int nzlmax = Glu.nzlmax;
|
|
kusano |
7d535a |
int *lsub = Glu.lsub;
|
|
kusano |
7d535a |
int *marker2 = marker + 2 * m;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Allocate memory */
|
|
kusano |
7d535a |
nextl = xlsub[jj] + 1;
|
|
kusano |
7d535a |
if (nextl >= nzlmax) {
|
|
kusano |
7d535a |
int error = sLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu);
|
|
kusano |
7d535a |
if (error) { *info = error; return; }
|
|
kusano |
7d535a |
lsub = Glu.lsub;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
xlsub[jj + 1]++;
|
|
kusano |
7d535a |
assert(xlusup[jj]==xlusup[jj+1]);
|
|
kusano |
7d535a |
xlusup[jj + 1]++;
|
|
kusano |
7d535a |
Glu.lusup[xlusup[jj]] = zero;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Choose a row index (pivrow) for fill-in */
|
|
kusano |
7d535a |
for (i = jj; i < n; i++)
|
|
kusano |
7d535a |
if (marker_relax[swap[i]] <= jj) break;
|
|
kusano |
7d535a |
row = swap[i];
|
|
kusano |
7d535a |
marker2[row] = jj;
|
|
kusano |
7d535a |
lsub[xlsub[jj]] = row;
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
printf("Fill col %d.\n", jj);
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Computer the quota */
|
|
kusano |
7d535a |
if (drop_rule & DROP_PROWS)
|
|
kusano |
7d535a |
quota = gamma * Astore->nnz / m * jj / m;
|
|
kusano |
7d535a |
else if (drop_rule & DROP_COLUMN)
|
|
kusano |
7d535a |
quota = gamma * (xa_end[jj] - xa_begin[jj]) *
|
|
kusano |
7d535a |
(jj + 1) / m;
|
|
kusano |
7d535a |
else if (drop_rule & DROP_AREA)
|
|
kusano |
7d535a |
quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
quota = m;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Copy the U-segments to ucol[*] and drop small entries */
|
|
kusano |
7d535a |
if ((*info = ilu_scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
|
|
kusano |
7d535a |
perm_r, &dense[k], drop_rule,
|
|
kusano |
7d535a |
milu, amax[jj - jcol] * tol_U,
|
|
kusano |
7d535a |
quota, &drop_sum, &nnzUj, &Glu,
|
|
kusano |
7d535a |
swork2)) != 0)
|
|
kusano |
7d535a |
return;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Reset the dropping threshold if required */
|
|
kusano |
7d535a |
if (drop_rule & DROP_DYNAMIC) {
|
|
kusano |
7d535a |
if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
|
|
kusano |
7d535a |
tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (drop_sum != zero)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
if (drop_sum > zero)
|
|
kusano |
7d535a |
omega = SUPERLU_MIN(2.0 * (1.0 - alpha)
|
|
kusano |
7d535a |
* amax[jj - jcol] / drop_sum, one);
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
omega = SUPERLU_MAX(2.0 * (1.0 - alpha)
|
|
kusano |
7d535a |
* amax[jj - jcol] / drop_sum, -one);
|
|
kusano |
7d535a |
drop_sum *= omega;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (usepr) pivrow = iperm_r[jj];
|
|
kusano |
7d535a |
fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
|
|
kusano |
7d535a |
if ( (*info = ilu_spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
|
|
kusano |
7d535a |
iperm_c[jj], swap, iswap,
|
|
kusano |
7d535a |
marker_relax, &pivrow,
|
|
kusano |
7d535a |
amax[jj - jcol] * fill_tol, milu,
|
|
kusano |
7d535a |
drop_sum, &Glu, stat)) ) {
|
|
kusano |
7d535a |
iinfo++;
|
|
kusano |
7d535a |
marker[m + pivrow] = jj;
|
|
kusano |
7d535a |
marker[2 * m + pivrow] = jj;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Reset repfnz[] for this column */
|
|
kusano |
7d535a |
resetrep_col (nseg, segrep, &repfnz[k]);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Start a new supernode, drop the previous one */
|
|
kusano |
7d535a |
if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
|
|
kusano |
7d535a |
int first = xsup[supno[jj - 1]];
|
|
kusano |
7d535a |
int last = jj - 1;
|
|
kusano |
7d535a |
int quota;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Compute the quota */
|
|
kusano |
7d535a |
if (drop_rule & DROP_PROWS)
|
|
kusano |
7d535a |
quota = gamma * Astore->nnz / m * (m - first) / m
|
|
kusano |
7d535a |
* (last - first + 1);
|
|
kusano |
7d535a |
else if (drop_rule & DROP_COLUMN) {
|
|
kusano |
7d535a |
int i;
|
|
kusano |
7d535a |
quota = 0;
|
|
kusano |
7d535a |
for (i = first; i <= last; i++)
|
|
kusano |
7d535a |
quota += xa_end[i] - xa_begin[i];
|
|
kusano |
7d535a |
quota = gamma * quota * (m - first) / m;
|
|
kusano |
7d535a |
} else if (drop_rule & DROP_AREA)
|
|
kusano |
7d535a |
quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
|
|
kusano |
7d535a |
/ m) - nnzLj;
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
quota = m * n;
|
|
kusano |
7d535a |
fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
|
|
kusano |
7d535a |
(double)min_mn);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Drop small rows */
|
|
kusano |
7d535a |
i = ilu_sdrop_row(options, first, last, tol_L, quota,
|
|
kusano |
7d535a |
&nnzLj, &fill_tol, &Glu, tempv, swork2,
|
|
kusano |
7d535a |
1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Reset the parameters */
|
|
kusano |
7d535a |
if (drop_rule & DROP_DYNAMIC) {
|
|
kusano |
7d535a |
if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
|
|
kusano |
7d535a |
< nnzLj)
|
|
kusano |
7d535a |
tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (fill_tol < 0) iinfo -= (int)fill_tol;
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
num_drop_L += i * (last - first + 1);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
} /* if start a new supernode */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* for */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
jcol += panel_size; /* Move to the next panel */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* else */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* for */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
*info = iinfo;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( m > n ) {
|
|
kusano |
7d535a |
k = 0;
|
|
kusano |
7d535a |
for (i = 0; i < m; ++i)
|
|
kusano |
7d535a |
if ( perm_r[i] == EMPTY ) {
|
|
kusano |
7d535a |
perm_r[i] = n + k;
|
|
kusano |
7d535a |
++k;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
ilu_countnz(min_mn, &nnzL, &nnzU, &Glu);
|
|
kusano |
7d535a |
fixupL(min_mn, perm_r, &Glu);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( fact == SamePattern_SameRowPerm ) {
|
|
kusano |
7d535a |
/* L and U structures may have changed due to possibly different
|
|
kusano |
7d535a |
pivoting, even though the storage is available.
|
|
kusano |
7d535a |
There could also be memory expansions, so the array locations
|
|
kusano |
7d535a |
may have changed, */
|
|
kusano |
7d535a |
((SCformat *)L->Store)->nnz = nnzL;
|
|
kusano |
7d535a |
((SCformat *)L->Store)->nsuper = Glu.supno[n];
|
|
kusano |
7d535a |
((SCformat *)L->Store)->nzval = Glu.lusup;
|
|
kusano |
7d535a |
((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
|
|
kusano |
7d535a |
((SCformat *)L->Store)->rowind = Glu.lsub;
|
|
kusano |
7d535a |
((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
|
|
kusano |
7d535a |
((NCformat *)U->Store)->nnz = nnzU;
|
|
kusano |
7d535a |
((NCformat *)U->Store)->nzval = Glu.ucol;
|
|
kusano |
7d535a |
((NCformat *)U->Store)->rowind = Glu.usub;
|
|
kusano |
7d535a |
((NCformat *)U->Store)->colptr = Glu.xusub;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
|
|
kusano |
7d535a |
Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
|
|
kusano |
7d535a |
Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
|
|
kusano |
7d535a |
sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
|
|
kusano |
7d535a |
Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
ops[FACT] += ops[TRSV] + ops[GEMV];
|
|
kusano |
7d535a |
stat->expansions = --(Glu.num_expansions);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
|
|
kusano |
7d535a |
SUPERLU_FREE (iperm_c);
|
|
kusano |
7d535a |
SUPERLU_FREE (relax_end);
|
|
kusano |
7d535a |
SUPERLU_FREE (swap);
|
|
kusano |
7d535a |
SUPERLU_FREE (iswap);
|
|
kusano |
7d535a |
SUPERLU_FREE (relax_fsupc);
|
|
kusano |
7d535a |
SUPERLU_FREE (amax);
|
|
kusano |
7d535a |
if ( swork2 ) SUPERLU_FREE (swork2);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|