|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file scolumn_dfs.c
|
|
kusano |
7d535a |
* \brief Performs a symbolic factorization
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 3.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* October 15, 2003
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
|
kusano |
7d535a |
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Permission is hereby granted to use or copy this program for any
|
|
kusano |
7d535a |
* purpose, provided the above notices are retained on all copies.
|
|
kusano |
7d535a |
* Permission to modify the code and to distribute modified code is
|
|
kusano |
7d535a |
* granted, provided the above notices are retained, and a notice that
|
|
kusano |
7d535a |
* the code was modified is included with the above copyright notice.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include "slu_sdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief What type of supernodes we want */
|
|
kusano |
7d535a |
#define T2_SUPER
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
* SCOLUMN_DFS performs a symbolic factorization on column jcol, and
|
|
kusano |
7d535a |
* decide the supernode boundary.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* This routine does not use numeric values, but only use the RHS
|
|
kusano |
7d535a |
* row indices to start the dfs.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* A supernode representative is the last column of a supernode.
|
|
kusano |
7d535a |
* The nonzeros in U[*,j] are segments that end at supernodal
|
|
kusano |
7d535a |
* representatives. The routine returns a list of such supernodal
|
|
kusano |
7d535a |
* representatives in topological order of the dfs that generates them.
|
|
kusano |
7d535a |
* The location of the first nonzero in each such supernodal segment
|
|
kusano |
7d535a |
* (supernodal entry location) is also returned.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Local parameters
|
|
kusano |
7d535a |
* ================
|
|
kusano |
7d535a |
* nseg: no of segments in current U[*,j]
|
|
kusano |
7d535a |
* jsuper: jsuper=EMPTY if column j does not belong to the same
|
|
kusano |
7d535a |
* supernode as j-1. Otherwise, jsuper=nsuper.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* marker2: A-row --> A-row/col (0/1)
|
|
kusano |
7d535a |
* repfnz: SuperA-col --> PA-row
|
|
kusano |
7d535a |
* parent: SuperA-col --> SuperA-col
|
|
kusano |
7d535a |
* xplore: SuperA-col --> index to L-structure
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Return value
|
|
kusano |
7d535a |
* ============
|
|
kusano |
7d535a |
* 0 success;
|
|
kusano |
7d535a |
* > 0 number of bytes allocated when run out of space.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
scolumn_dfs(
|
|
kusano |
7d535a |
const int m, /* in - number of rows in the matrix */
|
|
kusano |
7d535a |
const int jcol, /* in */
|
|
kusano |
7d535a |
int *perm_r, /* in */
|
|
kusano |
7d535a |
int *nseg, /* modified - with new segments appended */
|
|
kusano |
7d535a |
int *lsub_col, /* in - defines the RHS vector to start the dfs */
|
|
kusano |
7d535a |
int *segrep, /* modified - with new segments appended */
|
|
kusano |
7d535a |
int *repfnz, /* modified */
|
|
kusano |
7d535a |
int *xprune, /* modified */
|
|
kusano |
7d535a |
int *marker, /* modified */
|
|
kusano |
7d535a |
int *parent, /* working array */
|
|
kusano |
7d535a |
int *xplore, /* working array */
|
|
kusano |
7d535a |
GlobalLU_t *Glu /* modified */
|
|
kusano |
7d535a |
)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int jcolp1, jcolm1, jsuper, nsuper, nextl;
|
|
kusano |
7d535a |
int k, krep, krow, kmark, kperm;
|
|
kusano |
7d535a |
int *marker2; /* Used for small panel LU */
|
|
kusano |
7d535a |
int fsupc; /* First column of a snode */
|
|
kusano |
7d535a |
int myfnz; /* First nonz column of a U-segment */
|
|
kusano |
7d535a |
int chperm, chmark, chrep, kchild;
|
|
kusano |
7d535a |
int xdfs, maxdfs, kpar, oldrep;
|
|
kusano |
7d535a |
int jptr, jm1ptr;
|
|
kusano |
7d535a |
int ito, ifrom, istop; /* Used to compress row subscripts */
|
|
kusano |
7d535a |
int mem_error;
|
|
kusano |
7d535a |
int *xsup, *supno, *lsub, *xlsub;
|
|
kusano |
7d535a |
int nzlmax;
|
|
kusano |
7d535a |
static int first = 1, maxsuper;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
xsup = Glu->xsup;
|
|
kusano |
7d535a |
supno = Glu->supno;
|
|
kusano |
7d535a |
lsub = Glu->lsub;
|
|
kusano |
7d535a |
xlsub = Glu->xlsub;
|
|
kusano |
7d535a |
nzlmax = Glu->nzlmax;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( first ) {
|
|
kusano |
7d535a |
maxsuper = sp_ienv(3);
|
|
kusano |
7d535a |
first = 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
jcolp1 = jcol + 1;
|
|
kusano |
7d535a |
jcolm1 = jcol - 1;
|
|
kusano |
7d535a |
nsuper = supno[jcol];
|
|
kusano |
7d535a |
jsuper = nsuper;
|
|
kusano |
7d535a |
nextl = xlsub[jcol];
|
|
kusano |
7d535a |
marker2 = &marker[2*m];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* For each nonzero in A[*,jcol] do dfs */
|
|
kusano |
7d535a |
for (k = 0; lsub_col[k] != EMPTY; k++) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
krow = lsub_col[k];
|
|
kusano |
7d535a |
lsub_col[k] = EMPTY;
|
|
kusano |
7d535a |
kmark = marker2[krow];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* krow was visited before, go to the next nonz */
|
|
kusano |
7d535a |
if ( kmark == jcol ) continue;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* For each unmarked nbr krow of jcol
|
|
kusano |
7d535a |
* krow is in L: place it in structure of L[*,jcol]
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
marker2[krow] = jcol;
|
|
kusano |
7d535a |
kperm = perm_r[krow];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( kperm == EMPTY ) {
|
|
kusano |
7d535a |
lsub[nextl++] = krow; /* krow is indexed into A */
|
|
kusano |
7d535a |
if ( nextl >= nzlmax ) {
|
|
kusano |
7d535a |
if ( mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
|
|
kusano |
7d535a |
return (mem_error);
|
|
kusano |
7d535a |
lsub = Glu->lsub;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
/* krow is in U: if its supernode-rep krep
|
|
kusano |
7d535a |
* has been explored, update repfnz[*]
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
krep = xsup[supno[kperm]+1] - 1;
|
|
kusano |
7d535a |
myfnz = repfnz[krep];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( myfnz != EMPTY ) { /* Visited before */
|
|
kusano |
7d535a |
if ( myfnz > kperm ) repfnz[krep] = kperm;
|
|
kusano |
7d535a |
/* continue; */
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
else {
|
|
kusano |
7d535a |
/* Otherwise, perform dfs starting at krep */
|
|
kusano |
7d535a |
oldrep = EMPTY;
|
|
kusano |
7d535a |
parent[krep] = oldrep;
|
|
kusano |
7d535a |
repfnz[krep] = kperm;
|
|
kusano |
7d535a |
xdfs = xlsub[krep];
|
|
kusano |
7d535a |
maxdfs = xprune[krep];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
do {
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* For each unmarked kchild of krep
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
while ( xdfs < maxdfs ) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
kchild = lsub[xdfs];
|
|
kusano |
7d535a |
xdfs++;
|
|
kusano |
7d535a |
chmark = marker2[kchild];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( chmark != jcol ) { /* Not reached yet */
|
|
kusano |
7d535a |
marker2[kchild] = jcol;
|
|
kusano |
7d535a |
chperm = perm_r[kchild];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Case kchild is in L: place it in L[*,k] */
|
|
kusano |
7d535a |
if ( chperm == EMPTY ) {
|
|
kusano |
7d535a |
lsub[nextl++] = kchild;
|
|
kusano |
7d535a |
if ( nextl >= nzlmax ) {
|
|
kusano |
7d535a |
if ( mem_error =
|
|
kusano |
7d535a |
sLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
|
|
kusano |
7d535a |
return (mem_error);
|
|
kusano |
7d535a |
lsub = Glu->lsub;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( chmark != jcolm1 ) jsuper = EMPTY;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
/* Case kchild is in U:
|
|
kusano |
7d535a |
* chrep = its supernode-rep. If its rep has
|
|
kusano |
7d535a |
* been explored, update its repfnz[*]
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
chrep = xsup[supno[chperm]+1] - 1;
|
|
kusano |
7d535a |
myfnz = repfnz[chrep];
|
|
kusano |
7d535a |
if ( myfnz != EMPTY ) { /* Visited before */
|
|
kusano |
7d535a |
if ( myfnz > chperm )
|
|
kusano |
7d535a |
repfnz[chrep] = chperm;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
/* Continue dfs at super-rep of kchild */
|
|
kusano |
7d535a |
xplore[krep] = xdfs;
|
|
kusano |
7d535a |
oldrep = krep;
|
|
kusano |
7d535a |
krep = chrep; /* Go deeper down G(L^t) */
|
|
kusano |
7d535a |
parent[krep] = oldrep;
|
|
kusano |
7d535a |
repfnz[krep] = chperm;
|
|
kusano |
7d535a |
xdfs = xlsub[krep];
|
|
kusano |
7d535a |
maxdfs = xprune[krep];
|
|
kusano |
7d535a |
} /* else */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* else */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* if */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* while */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* krow has no more unexplored nbrs;
|
|
kusano |
7d535a |
* place supernode-rep krep in postorder DFS.
|
|
kusano |
7d535a |
* backtrack dfs to its parent
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
segrep[*nseg] = krep;
|
|
kusano |
7d535a |
++(*nseg);
|
|
kusano |
7d535a |
kpar = parent[krep]; /* Pop from stack, mimic recursion */
|
|
kusano |
7d535a |
if ( kpar == EMPTY ) break; /* dfs done */
|
|
kusano |
7d535a |
krep = kpar;
|
|
kusano |
7d535a |
xdfs = xplore[krep];
|
|
kusano |
7d535a |
maxdfs = xprune[krep];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} while ( kpar != EMPTY ); /* Until empty stack */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* else */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* else */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* for each nonzero ... */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Check to see if j belongs in the same supernode as j-1 */
|
|
kusano |
7d535a |
if ( jcol == 0 ) { /* Do nothing for column 0 */
|
|
kusano |
7d535a |
nsuper = supno[0] = 0;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
fsupc = xsup[nsuper];
|
|
kusano |
7d535a |
jptr = xlsub[jcol]; /* Not compressed yet */
|
|
kusano |
7d535a |
jm1ptr = xlsub[jcolm1];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#ifdef T2_SUPER
|
|
kusano |
7d535a |
if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
/* Make sure the number of columns in a supernode doesn't
|
|
kusano |
7d535a |
exceed threshold. */
|
|
kusano |
7d535a |
if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* If jcol starts a new supernode, reclaim storage space in
|
|
kusano |
7d535a |
* lsub from the previous supernode. Note we only store
|
|
kusano |
7d535a |
* the subscript set of the first and last columns of
|
|
kusano |
7d535a |
* a supernode. (first for num values, last for pruning)
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
if ( jsuper == EMPTY ) { /* starts a new supernode */
|
|
kusano |
7d535a |
if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
|
|
kusano |
7d535a |
#ifdef CHK_COMPRESS
|
|
kusano |
7d535a |
printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
ito = xlsub[fsupc+1];
|
|
kusano |
7d535a |
xlsub[jcolm1] = ito;
|
|
kusano |
7d535a |
istop = ito + jptr - jm1ptr;
|
|
kusano |
7d535a |
xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
|
|
kusano |
7d535a |
xlsub[jcol] = istop;
|
|
kusano |
7d535a |
for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
|
|
kusano |
7d535a |
lsub[ito] = lsub[ifrom];
|
|
kusano |
7d535a |
nextl = ito; /* = istop + length(jcol) */
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
nsuper++;
|
|
kusano |
7d535a |
supno[jcol] = nsuper;
|
|
kusano |
7d535a |
} /* if a new supernode */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* else: jcol > 0 */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Tidy up the pointers before exit */
|
|
kusano |
7d535a |
xsup[nsuper+1] = jcolp1;
|
|
kusano |
7d535a |
supno[jcolp1] = nsuper;
|
|
kusano |
7d535a |
xprune[jcol] = nextl; /* Initialize upper bound for pruning */
|
|
kusano |
7d535a |
xlsub[jcolp1] = nextl;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|