diff --git a/thirdparty/superlu/SuperLU_4.1/FORTRAN/c_fortran_dgssv.c.bak b/thirdparty/superlu/SuperLU_4.1/FORTRAN/c_fortran_dgssv.c.bak deleted file mode 100644 index 284abef..0000000 --- a/thirdparty/superlu/SuperLU_4.1/FORTRAN/c_fortran_dgssv.c.bak +++ /dev/null @@ -1,175 +0,0 @@ -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ - -#include "dsp_defs.h" - -#define HANDLE_SIZE 8 -/* kind of integer to hold a pointer. Use int. - This might need to be changed on 64-bit systems. */ -typedef int fptr; /* 32-bit by default */ - -typedef struct { - SuperMatrix *L; - SuperMatrix *U; - int *perm_c; - int *perm_r; -} factors_t; - -void -c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values, - int *rowind, int *colptr, double *b, int *ldb, - fptr *f_factors, /* a handle containing the address - pointing to the factored matrices */ - int *info) - -{ -/* - * This routine can be called from Fortran. - * - * iopt (input) int - * Specifies the operation: - * = 1, performs LU decomposition for the first time - * = 2, performs triangular solve - * = 3, free all the storage in the end - * - * f_factors (input/output) fptr* - * If iopt == 1, it is an output and contains the pointer pointing to - * the structure of the factored matrices. - * Otherwise, it it an input. - * - */ - - SuperMatrix A, AC, B; - SuperMatrix *L, *U; - int *perm_r; /* row permutations from partial pivoting */ - int *perm_c; /* column permutation vector */ - int *etree; /* column elimination tree */ - SCformat *Lstore; - NCformat *Ustore; - int i, panel_size, permc_spec, relax; - trans_t trans; - double drop_tol = 0.0; - mem_usage_t mem_usage; - superlu_options_t options; - SuperLUStat_t stat; - factors_t *LUfactors; - - trans = NOTRANS; - - if ( *iopt == 1 ) { /* LU decomposition */ - - /* Set the default input options. */ - set_default_options(&options); - - /* Initialize the statistics variables. */ - StatInit(&stat); - - /* Adjust to 0-based indexing */ - for (i = 0; i < *nnz; ++i) --rowind[i]; - for (i = 0; i <= *n; ++i) --colptr[i]; - - dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr, - SLU_NC, SLU_D, SLU_GE); - L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); - U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); - if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[]."); - if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[]."); - if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[]."); - - /* - * Get column permutation vector perm_c[], according to permc_spec: - * permc_spec = 0: natural ordering - * permc_spec = 1: minimum degree on structure of A'*A - * permc_spec = 2: minimum degree on structure of A'+A - * permc_spec = 3: approximate minimum degree for unsymmetric matrices - */ - permc_spec = options.ColPerm; - get_perm_c(permc_spec, &A, perm_c); - - sp_preorder(&options, &A, perm_c, etree, &AC); - - panel_size = sp_ienv(1); - relax = sp_ienv(2); - - dgstrf(&options, &AC, drop_tol, relax, panel_size, - etree, NULL, 0, perm_c, perm_r, L, U, &stat, info); - - if ( *info == 0 ) { - Lstore = (SCformat *) L->Store; - Ustore = (NCformat *) U->Store; - printf("No of nonzeros in factor L = %d\n", Lstore->nnz); - printf("No of nonzeros in factor U = %d\n", Ustore->nnz); - printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); - dQuerySpace(L, U, &mem_usage); - printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", - mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, - mem_usage.expansions); - } else { - printf("dgstrf() error returns INFO= %d\n", *info); - if ( *info <= *n ) { /* factorization completes */ - dQuerySpace(L, U, &mem_usage); - printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", - mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, - mem_usage.expansions); - } - } - - /* Restore to 1-based indexing */ - for (i = 0; i < *nnz; ++i) ++rowind[i]; - for (i = 0; i <= *n; ++i) ++colptr[i]; - - /* Save the LU factors in the factors handle */ - LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t)); - LUfactors->L = L; - LUfactors->U = U; - LUfactors->perm_c = perm_c; - LUfactors->perm_r = perm_r; - *f_factors = (fptr) LUfactors; - - /* Free un-wanted storage */ - SUPERLU_FREE(etree); - Destroy_SuperMatrix_Store(&A); - Destroy_CompCol_Permuted(&AC); - StatFree(&stat); - - } else if ( *iopt == 2 ) { /* Triangular solve */ - /* Initialize the statistics variables. */ - StatInit(&stat); - - /* Extract the LU factors in the factors handle */ - LUfactors = (factors_t*) *f_factors; - L = LUfactors->L; - U = LUfactors->U; - perm_c = LUfactors->perm_c; - perm_r = LUfactors->perm_r; - - dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE); - - /* Solve the system A*X=B, overwriting B with X. */ - dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info); - - Destroy_SuperMatrix_Store(&B); - StatFree(&stat); - - } else if ( *iopt == 3 ) { /* Free storage */ - /* Free the LU factors in the factors handle */ - LUfactors = (factors_t*) *f_factors; - SUPERLU_FREE (LUfactors->perm_r); - SUPERLU_FREE (LUfactors->perm_c); - Destroy_SuperNode_Matrix(LUfactors->L); - Destroy_CompCol_Matrix(LUfactors->U); - SUPERLU_FREE (LUfactors->L); - SUPERLU_FREE (LUfactors->U); - SUPERLU_FREE (LUfactors); - } else { - fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt); - exit(-1); - } -} - - diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/cgsitrf.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/cgsitrf.c.bak deleted file mode 100644 index 03474b1..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/cgsitrf.c.bak +++ /dev/null @@ -1,629 +0,0 @@ - -/*! @file cgsitf.c - * \brief Computes an ILU factorization of a general sparse matrix - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory. - * June 30, 2009 - * </pre> - */ - -#include "slu_cdefs.h" - -#ifdef DEBUG -int num_drop_L; -#endif - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * CGSITRF computes an ILU factorization of a general sparse m-by-n - * matrix A using partial pivoting with row interchanges. - * The factorization has the form - * Pr * A = L * U - * where Pr is a row permutation matrix, L is lower triangular with unit - * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper - * triangular (upper trapezoidal if A->nrow < A->ncol). - * - * See supermatrix.h for the definition of 'SuperMatrix' structure. - * - * Arguments - * ========= - * - * options (input) superlu_options_t* - * The structure defines the input parameters to control - * how the ILU decomposition will be performed. - * - * A (input) SuperMatrix* - * Original matrix A, permuted by columns, of dimension - * (A->nrow, A->ncol). The type of A can be: - * Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE. - * - * relax (input) int - * To control degree of relaxing supernodes. If the number - * of nodes (columns) in a subtree of the elimination tree is less - * than relax, this subtree is considered as one supernode, - * regardless of the row structures of those columns. - * - * panel_size (input) int - * A panel consists of at most panel_size consecutive columns. - * - * etree (input) int*, dimension (A->ncol) - * Elimination tree of A'*A. - * Note: etree is a vector of parent pointers for a forest whose - * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. - * On input, the columns of A should be permuted so that the - * etree is in a certain postorder. - * - * work (input/output) void*, size (lwork) (in bytes) - * User-supplied work space and space for the output data structures. - * Not referenced if lwork = 0; - * - * lwork (input) int - * Specifies the size of work array in bytes. - * = 0: allocate space internally by system malloc; - * > 0: use user-supplied work array of length lwork in bytes, - * returns error if space runs out. - * = -1: the routine guesses the amount of space needed without - * performing the factorization, and returns it in - * *info; no other side effects. - * - * perm_c (input) int*, dimension (A->ncol) - * Column permutation vector, which defines the - * permutation matrix Pc; perm_c[i] = j means column i of A is - * in position j in A*Pc. - * When searching for diagonal, perm_c[*] is applied to the - * row subscripts of A, so that diagonal threshold pivoting - * can find the diagonal of A, rather than that of A*Pc. - * - * perm_r (input/output) int*, dimension (A->nrow) - * Row permutation vector which defines the permutation matrix Pr, - * perm_r[i] = j means row i of A is in position j in Pr*A. - * If options->Fact = SamePattern_SameRowPerm, the pivoting routine - * will try to use the input perm_r, unless a certain threshold - * criterion is violated. In that case, perm_r is overwritten by - * a new permutation determined by partial pivoting or diagonal - * threshold pivoting. - * Otherwise, perm_r is output argument; - * - * L (output) SuperMatrix* - * The factor L from the factorization Pr*A=L*U; use compressed row - * subscripts storage for supernodes, i.e., L has type: - * Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU. - * - * U (output) SuperMatrix* - * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise - * storage scheme, i.e., U has types: Stype = SLU_NC, - * Dtype = SLU_C, Mtype = SLU_TRU. - * - * stat (output) SuperLUStat_t* - * Record the statistics on runtime and floating-point operation count. - * See slu_util.h for the definition of 'SuperLUStat_t'. - * - * info (output) int* - * = 0: successful exit - * < 0: if info = -i, the i-th argument had an illegal value - * > 0: if info = i, and i is - * <= A->ncol: number of zero pivots. They are replaced by small - * entries according to options->ILU_FillTol. - * > A->ncol: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. If lwork = -1, it is - * the estimated amount of space needed, plus A->ncol. - * - * ====================================================================== - * - * Local Working Arrays: - * ====================== - * m = number of rows in the matrix - * n = number of columns in the matrix - * - * marker[0:3*m-1]: marker[i] = j means that node i has been - * reached when working on column j. - * Storage: relative to original row subscripts - * NOTE: There are 4 of them: - * marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c; - * marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c; - * marker_relax(has its own space) is used for relaxed supernodes. - * - * parent[0:m-1]: parent vector used during dfs - * Storage: relative to new row subscripts - * - * xplore[0:m-1]: xplore[i] gives the location of the next (dfs) - * unexplored neighbor of i in lsub[*] - * - * segrep[0:nseg-1]: contains the list of supernodal representatives - * in topological order of the dfs. A supernode representative is the - * last column of a supernode. - * The maximum size of segrep[] is n. - * - * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a - * supernodal representative r, repfnz[r] is the location of the first - * nonzero in this segment. It is also used during the dfs: repfnz[r]>0 - * indicates the supernode r has been explored. - * NOTE: There are W of them, each used for one column of a panel. - * - * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below - * the panel diagonal. These are filled in during dpanel_dfs(), and are - * used later in the inner LU factorization within the panel. - * panel_lsub[]/dense[] pair forms the SPA data structure. - * NOTE: There are W of them. - * - * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; - * NOTE: there are W of them. - * - * tempv[0:*]: real temporary used for dense numeric kernels; - * The size of this array is defined by NUM_TEMPV() in slu_util.h. - * It is also used by the dropping routine ilu_ddrop_row(). - * </pre> - */ - -void -cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, - int *etree, void *work, int lwork, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) -{ - /* Local working arrays */ - NCPformat *Astore; - int *iperm_r = NULL; /* inverse of perm_r; used when - options->Fact == SamePattern_SameRowPerm */ - int *iperm_c; /* inverse of perm_c */ - int *swap, *iswap; /* swap is used to store the row permutation - during the factorization. Initially, it is set - to iperm_c (row indeces of Pc*A*Pc'). - iswap is the inverse of swap. After the - factorization, it is equal to perm_r. */ - int *iwork; - complex *cwork; - int *segrep, *repfnz, *parent, *xplore; - int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ - int *marker, *marker_relax; - complex *dense, *tempv; - float *stempv; - int *relax_end, *relax_fsupc; - complex *a; - int *asub; - int *xa_begin, *xa_end; - int *xsup, *supno; - int *xlsub, *xlusup, *xusub; - int nzlumax; - float *amax; - complex drop_sum; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ - int *iwork2; /* used by the second dropping rule */ - - /* Local scalars */ - fact_t fact = options->Fact; - double diag_pivot_thresh = options->DiagPivotThresh; - double drop_tol = options->ILU_DropTol; /* tau */ - double fill_ini = options->ILU_FillTol; /* tau^hat */ - double gamma = options->ILU_FillFactor; - int drop_rule = options->ILU_DropRule; - milu_t milu = options->ILU_MILU; - double fill_tol; - int pivrow; /* pivotal row number in the original matrix A */ - int nseg1; /* no of segments in U-column above panel row jcol */ - int nseg; /* no of segments in each U-column */ - register int jcol; - register int kcol; /* end column of a relaxed snode */ - register int icol; - register int i, k, jj, new_next, iinfo; - int m, n, min_mn, jsupno, fsupc, nextlu, nextu; - int w_def; /* upper bound on panel width */ - int usepr, iperm_r_allocated = 0; - int nnzL, nnzU; - int *panel_histo = stat->panel_histo; - flops_t *ops = stat->ops; - - int last_drop;/* the last column which the dropping rules applied */ - int quota; - int nnzAj; /* number of nonzeros in A(:,1:j) */ - int nnzLj, nnzUj; - double tol_L = drop_tol, tol_U = drop_tol; - complex zero = {0.0, 0.0}; - - /* Executable */ - iinfo = 0; - m = A->nrow; - n = A->ncol; - min_mn = SUPERLU_MIN(m, n); - Astore = A->Store; - a = Astore->nzval; - asub = Astore->rowind; - xa_begin = Astore->colbeg; - xa_end = Astore->colend; - - /* Allocate storage common to the factor routines */ - *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, - gamma, L, U, &Glu, &iwork, &cwork); - if ( *info ) return; - - xsup = Glu.xsup; - supno = Glu.supno; - xlsub = Glu.xlsub; - xlusup = Glu.xlusup; - xusub = Glu.xusub; - - SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, - &repfnz, &panel_lsub, &marker_relax, &marker); - cSetRWork(m, panel_size, cwork, &dense, &tempv); - - usepr = (fact == SamePattern_SameRowPerm); - if ( usepr ) { - /* Compute the inverse of perm_r */ - iperm_r = (int *) intMalloc(m); - for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; - iperm_r_allocated = 1; - } - - iperm_c = (int *) intMalloc(n); - for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; - swap = (int *)intMalloc(n); - for (k = 0; k < n; k++) swap[k] = iperm_c[k]; - iswap = (int *)intMalloc(n); - for (k = 0; k < n; k++) iswap[k] = perm_c[k]; - amax = (float *) floatMalloc(panel_size); - if (drop_rule & DROP_SECONDARY) - iwork2 = (int *)intMalloc(n); - else - iwork2 = NULL; - - nnzAj = 0; - nnzLj = 0; - nnzUj = 0; - last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95)); - - /* Identify relaxed snodes */ - relax_end = (int *) intMalloc(n); - relax_fsupc = (int *) intMalloc(n); - if ( options->SymmetricMode == YES ) - ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); - else - ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); - - ifill (perm_r, m, EMPTY); - ifill (marker, m * NO_MARKER, EMPTY); - supno[0] = -1; - xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; - w_def = panel_size; - - /* Mark the rows used by relaxed supernodes */ - ifill (marker_relax, m, EMPTY); - i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, - asub, marker_relax); -#if ( PRNTlevel >= 1) - printf("%d relaxed supernodes.\n", i); -#endif - - /* - * Work on one "panel" at a time. A panel is one of the following: - * (a) a relaxed supernode at the bottom of the etree, or - * (b) panel_size contiguous columns, defined by the user - */ - for (jcol = 0; jcol < min_mn; ) { - - if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ - kcol = relax_end[jcol]; /* end of the relaxed snode */ - panel_histo[kcol-jcol+1]++; - - /* Drop small rows in the previous supernode. */ - if (jcol > 0 && jcol < last_drop) { - int first = xsup[supno[jcol - 1]]; - int last = jcol - 1; - int quota; - - /* Compute the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * (m - first) / m - * (last - first + 1); - else if (drop_rule & DROP_COLUMN) { - int i; - quota = 0; - for (i = first; i <= last; i++) - quota += xa_end[i] - xa_begin[i]; - quota = gamma * quota * (m - first) / m; - } else if (drop_rule & DROP_AREA) - quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - - nnzLj; - else - quota = m * n; - fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); - - /* Drop small rows */ - stempv = (float *) tempv; - i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj, - &fill_tol, &Glu, stempv, iwork2, 0); - /* Reset the parameters */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - < nnzLj) - tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); - else - tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); - } - if (fill_tol < 0) iinfo -= (int)fill_tol; -#ifdef DEBUG - num_drop_L += i * (last - first + 1); -#endif - } - - /* -------------------------------------- - * Factorize the relaxed supernode(jcol:kcol) - * -------------------------------------- */ - /* Determine the union of the row structure of the snode */ - if ( (*info = ilu_csnode_dfs(jcol, kcol, asub, xa_begin, xa_end, - marker, &Glu)) != 0 ) - return; - - nextu = xusub[jcol]; - nextlu = xlusup[jcol]; - jsupno = supno[jcol]; - fsupc = xsup[jsupno]; - new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); - nzlumax = Glu.nzlumax; - while ( new_next > nzlumax ) { - if ((*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) - return; - } - - for (icol = jcol; icol <= kcol; icol++) { - xusub[icol+1] = nextu; - - amax[0] = 0.0; - /* Scatter into SPA dense[*] */ - for (k = xa_begin[icol]; k < xa_end[icol]; k++) { - register float tmp = c_abs1 (&a[k]); - if (tmp > amax[0]) amax[0] = tmp; - dense[asub[k]] = a[k]; - } - nnzAj += xa_end[icol] - xa_begin[icol]; - if (amax[0] == 0.0) { - amax[0] = fill_ini; -#if ( PRNTlevel >= 1) - printf("Column %d is entirely zero!\n", icol); - fflush(stdout); -#endif - } - - /* Numeric update within the snode */ - csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); - - if (usepr) pivrow = iperm_r[icol]; - fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); - if ( (*info = ilu_cpivotL(icol, diag_pivot_thresh, &usepr, - perm_r, iperm_c[icol], swap, iswap, - marker_relax, &pivrow, - amax[0] * fill_tol, milu, zero, - &Glu, stat)) ) { - iinfo++; - marker[pivrow] = kcol; - } - - } - - jcol = kcol + 1; - - } else { /* Work on one panel of panel_size columns */ - - /* Adjust panel_size so that a panel won't overlap with the next - * relaxed snode. - */ - panel_size = w_def; - for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) - if ( relax_end[k] != EMPTY ) { - panel_size = k - jcol; - break; - } - if ( k == min_mn ) panel_size = min_mn - jcol; - panel_histo[panel_size]++; - - /* symbolic factor on a panel of columns */ - ilu_cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, - dense, amax, panel_lsub, segrep, repfnz, - marker, parent, xplore, &Glu); - - /* numeric sup-panel updates in topological order */ - cpanel_bmod(m, panel_size, jcol, nseg1, dense, - tempv, segrep, repfnz, &Glu, stat); - - /* Sparse LU within the panel, and below panel diagonal */ - for (jj = jcol; jj < jcol + panel_size; jj++) { - - k = (jj - jcol) * m; /* column index for w-wide arrays */ - - nseg = nseg1; /* Begin after all the panel segments */ - - nnzAj += xa_end[jj] - xa_begin[jj]; - - if ((*info = ilu_ccolumn_dfs(m, jj, perm_r, &nseg, - &panel_lsub[k], segrep, &repfnz[k], - marker, parent, xplore, &Glu))) - return; - - /* Numeric updates */ - if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k], - tempv, &segrep[nseg1], &repfnz[k], - jcol, &Glu, stat)) != 0) return; - - /* Make a fill-in position if the column is entirely zero */ - if (xlsub[jj + 1] == xlsub[jj]) { - register int i, row; - int nextl; - int nzlmax = Glu.nzlmax; - int *lsub = Glu.lsub; - int *marker2 = marker + 2 * m; - - /* Allocate memory */ - nextl = xlsub[jj] + 1; - if (nextl >= nzlmax) { - int error = cLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); - if (error) { *info = error; return; } - lsub = Glu.lsub; - } - xlsub[jj + 1]++; - assert(xlusup[jj]==xlusup[jj+1]); - xlusup[jj + 1]++; - Glu.lusup[xlusup[jj]] = zero; - - /* Choose a row index (pivrow) for fill-in */ - for (i = jj; i < n; i++) - if (marker_relax[swap[i]] <= jj) break; - row = swap[i]; - marker2[row] = jj; - lsub[xlsub[jj]] = row; -#ifdef DEBUG - printf("Fill col %d.\n", jj); - fflush(stdout); -#endif - } - - /* Computer the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * jj / m; - else if (drop_rule & DROP_COLUMN) - quota = gamma * (xa_end[jj] - xa_begin[jj]) * - (jj + 1) / m; - else if (drop_rule & DROP_AREA) - quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; - else - quota = m; - - /* Copy the U-segments to ucol[*] and drop small entries */ - if ((*info = ilu_ccopy_to_ucol(jj, nseg, segrep, &repfnz[k], - perm_r, &dense[k], drop_rule, - milu, amax[jj - jcol] * tol_U, - quota, &drop_sum, &nnzUj, &Glu, - iwork2)) != 0) - return; - - /* Reset the dropping threshold if required */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) - tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); - else - tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); - } - - cs_mult(&drop_sum, &drop_sum, MILU_ALPHA); - if (usepr) pivrow = iperm_r[jj]; - fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); - if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r, - iperm_c[jj], swap, iswap, - marker_relax, &pivrow, - amax[jj - jcol] * fill_tol, milu, - drop_sum, &Glu, stat)) ) { - iinfo++; - marker[m + pivrow] = jj; - marker[2 * m + pivrow] = jj; - } - - /* Reset repfnz[] for this column */ - resetrep_col (nseg, segrep, &repfnz[k]); - - /* Start a new supernode, drop the previous one */ - if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { - int first = xsup[supno[jj - 1]]; - int last = jj - 1; - int quota; - - /* Compute the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * (m - first) / m - * (last - first + 1); - else if (drop_rule & DROP_COLUMN) { - int i; - quota = 0; - for (i = first; i <= last; i++) - quota += xa_end[i] - xa_begin[i]; - quota = gamma * quota * (m - first) / m; - } else if (drop_rule & DROP_AREA) - quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) - / m) - nnzLj; - else - quota = m * n; - fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / - (double)min_mn); - - /* Drop small rows */ - stempv = (float *) tempv; - i = ilu_cdrop_row(options, first, last, tol_L, quota, - &nnzLj, &fill_tol, &Glu, stempv, iwork2, - 1); - - /* Reset the parameters */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - < nnzLj) - tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); - else - tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); - } - if (fill_tol < 0) iinfo -= (int)fill_tol; -#ifdef DEBUG - num_drop_L += i * (last - first + 1); -#endif - } /* if start a new supernode */ - - } /* for */ - - jcol += panel_size; /* Move to the next panel */ - - } /* else */ - - } /* for */ - - *info = iinfo; - - if ( m > n ) { - k = 0; - for (i = 0; i < m; ++i) - if ( perm_r[i] == EMPTY ) { - perm_r[i] = n + k; - ++k; - } - } - - ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); - fixupL(min_mn, perm_r, &Glu); - - cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */ - - if ( fact == SamePattern_SameRowPerm ) { - /* L and U structures may have changed due to possibly different - pivoting, even though the storage is available. - There could also be memory expansions, so the array locations - may have changed, */ - ((SCformat *)L->Store)->nnz = nnzL; - ((SCformat *)L->Store)->nsuper = Glu.supno[n]; - ((SCformat *)L->Store)->nzval = Glu.lusup; - ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; - ((SCformat *)L->Store)->rowind = Glu.lsub; - ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; - ((NCformat *)U->Store)->nnz = nnzU; - ((NCformat *)U->Store)->nzval = Glu.ucol; - ((NCformat *)U->Store)->rowind = Glu.usub; - ((NCformat *)U->Store)->colptr = Glu.xusub; - } else { - cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, - Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, - Glu.xsup, SLU_SC, SLU_C, SLU_TRLU); - cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, - Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU); - } - - ops[FACT] += ops[TRSV] + ops[GEMV]; - stat->expansions = --(Glu.num_expansions); - - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - SUPERLU_FREE (swap); - SUPERLU_FREE (iswap); - SUPERLU_FREE (relax_fsupc); - SUPERLU_FREE (amax); - if ( iwork2 ) SUPERLU_FREE (iwork2); - -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/cgssvx.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/cgssvx.c.bak deleted file mode 100644 index 428ecb0..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/cgssvx.c.bak +++ /dev/null @@ -1,619 +0,0 @@ - -/*! @file cgssvx.c - * \brief Solves the system of linear equations A*X=B or A'*X=B - * - * <pre> - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * </pre> - */ -#include "slu_cdefs.h" - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * CGSSVX solves the system of linear equations A*X=B or A'*X=B, using - * the LU factorization from cgstrf(). Error bounds on the solution and - * a condition estimate are also provided. It performs the following steps: - * - * 1. If A is stored column-wise (A->Stype = SLU_NC): - * - * 1.1. If options->Equil = YES, scaling factors are computed to - * equilibrate the system: - * options->Trans = NOTRANS: - * diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B - * options->Trans = TRANS: - * (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B - * options->Trans = CONJ: - * (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B - * Whether or not the system will be equilibrated depends on the - * scaling of the matrix A, but if equilibration is used, A is - * overwritten by diag(R)*A*diag(C) and B by diag(R)*B - * (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans - * = TRANS or CONJ). - * - * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation - * matrix that usually preserves sparsity. - * For more details of this step, see sp_preorder.c. - * - * 1.3. If options->Fact != FACTORED, the LU decomposition is used to - * factor the matrix A (after equilibration if options->Equil = YES) - * as Pr*A*Pc = L*U, with Pr determined by partial pivoting. - * - * 1.4. Compute the reciprocal pivot growth factor. - * - * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the - * routine returns with info = i. Otherwise, the factored form of - * A is used to estimate the condition number of the matrix A. If - * the reciprocal of the condition number is less than machine - * precision, info = A->ncol+1 is returned as a warning, but the - * routine still goes on to solve for X and computes error bounds - * as described below. - * - * 1.6. The system of equations is solved for X using the factored form - * of A. - * - * 1.7. If options->IterRefine != NOREFINE, iterative refinement is - * applied to improve the computed solution matrix and calculate - * error bounds and backward error estimates for it. - * - * 1.8. If equilibration was used, the matrix X is premultiplied by - * diag(C) (if options->Trans = NOTRANS) or diag(R) - * (if options->Trans = TRANS or CONJ) so that it solves the - * original system before equilibration. - * - * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm - * to the transpose of A: - * - * 2.1. If options->Equil = YES, scaling factors are computed to - * equilibrate the system: - * options->Trans = NOTRANS: - * diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B - * options->Trans = TRANS: - * (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B - * options->Trans = CONJ: - * (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B - * Whether or not the system will be equilibrated depends on the - * scaling of the matrix A, but if equilibration is used, A' is - * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B - * (if trans='N') or diag(C)*B (if trans = 'T' or 'C'). - * - * 2.2. Permute columns of transpose(A) (rows of A), - * forming transpose(A)*Pc, where Pc is a permutation matrix that - * usually preserves sparsity. - * For more details of this step, see sp_preorder.c. - * - * 2.3. If options->Fact != FACTORED, the LU decomposition is used to - * factor the transpose(A) (after equilibration if - * options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the - * permutation Pr determined by partial pivoting. - * - * 2.4. Compute the reciprocal pivot growth factor. - * - * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the - * routine returns with info = i. Otherwise, the factored form - * of transpose(A) is used to estimate the condition number of the - * matrix A. If the reciprocal of the condition number - * is less than machine precision, info = A->nrow+1 is returned as - * a warning, but the routine still goes on to solve for X and - * computes error bounds as described below. - * - * 2.6. The system of equations is solved for X using the factored form - * of transpose(A). - * - * 2.7. If options->IterRefine != NOREFINE, iterative refinement is - * applied to improve the computed solution matrix and calculate - * error bounds and backward error estimates for it. - * - * 2.8. If equilibration was used, the matrix X is premultiplied by - * diag(C) (if options->Trans = NOTRANS) or diag(R) - * (if options->Trans = TRANS or CONJ) so that it solves the - * original system before equilibration. - * - * See supermatrix.h for the definition of 'SuperMatrix' structure. - * - * Arguments - * ========= - * - * options (input) superlu_options_t* - * The structure defines the input parameters to control - * how the LU decomposition will be performed and how the - * system will be solved. - * - * A (input/output) SuperMatrix* - * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number - * of the linear equations is A->nrow. Currently, the type of A can be: - * Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE. - * In the future, more general A may be handled. - * - * On entry, If options->Fact = FACTORED and equed is not 'N', - * then A must have been equilibrated by the scaling factors in - * R and/or C. - * On exit, A is not modified if options->Equil = NO, or if - * options->Equil = YES but equed = 'N' on exit. - * Otherwise, if options->Equil = YES and equed is not 'N', - * A is scaled as follows: - * If A->Stype = SLU_NC: - * equed = 'R': A := diag(R) * A - * equed = 'C': A := A * diag(C) - * equed = 'B': A := diag(R) * A * diag(C). - * If A->Stype = SLU_NR: - * equed = 'R': transpose(A) := diag(R) * transpose(A) - * equed = 'C': transpose(A) := transpose(A) * diag(C) - * equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C). - * - * perm_c (input/output) int* - * If A->Stype = SLU_NC, Column permutation vector of size A->ncol, - * which defines the permutation matrix Pc; perm_c[i] = j means - * column i of A is in position j in A*Pc. - * On exit, perm_c may be overwritten by the product of the input - * perm_c and a permutation that postorders the elimination tree - * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree - * is already in postorder. - * - * If A->Stype = SLU_NR, column permutation vector of size A->nrow, - * which describes permutation of columns of transpose(A) - * (rows of A) as described above. - * - * perm_r (input/output) int* - * If A->Stype = SLU_NC, row permutation vector of size A->nrow, - * which defines the permutation matrix Pr, and is determined - * by partial pivoting. perm_r[i] = j means row i of A is in - * position j in Pr*A. - * - * If A->Stype = SLU_NR, permutation vector of size A->ncol, which - * determines permutation of rows of transpose(A) - * (columns of A) as described above. - * - * If options->Fact = SamePattern_SameRowPerm, the pivoting routine - * will try to use the input perm_r, unless a certain threshold - * criterion is violated. In that case, perm_r is overwritten by a - * new permutation determined by partial pivoting or diagonal - * threshold pivoting. - * Otherwise, perm_r is output argument. - * - * etree (input/output) int*, dimension (A->ncol) - * Elimination tree of Pc'*A'*A*Pc. - * If options->Fact != FACTORED and options->Fact != DOFACT, - * etree is an input argument, otherwise it is an output argument. - * Note: etree is a vector of parent pointers for a forest whose - * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. - * - * equed (input/output) char* - * Specifies the form of equilibration that was done. - * = 'N': No equilibration. - * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). - * = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). - * = 'B': Both row and column equilibration, i.e., A was replaced - * by diag(R)*A*diag(C). - * If options->Fact = FACTORED, equed is an input argument, - * otherwise it is an output argument. - * - * R (input/output) float*, dimension (A->nrow) - * The row scale factors for A or transpose(A). - * If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A) - * (if A->Stype = SLU_NR) is multiplied on the left by diag(R). - * If equed = 'N' or 'C', R is not accessed. - * If options->Fact = FACTORED, R is an input argument, - * otherwise, R is output. - * If options->zFact = FACTORED and equed = 'R' or 'B', each element - * of R must be positive. - * - * C (input/output) float*, dimension (A->ncol) - * The column scale factors for A or transpose(A). - * If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A) - * (if A->Stype = SLU_NR) is multiplied on the right by diag(C). - * If equed = 'N' or 'R', C is not accessed. - * If options->Fact = FACTORED, C is an input argument, - * otherwise, C is output. - * If options->Fact = FACTORED and equed = 'C' or 'B', each element - * of C must be positive. - * - * L (output) SuperMatrix* - * The factor L from the factorization - * Pr*A*Pc=L*U (if A->Stype SLU_= NC) or - * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). - * Uses compressed row subscripts storage for supernodes, i.e., - * L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU. - * - * U (output) SuperMatrix* - * The factor U from the factorization - * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or - * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). - * Uses column-wise storage scheme, i.e., U has types: - * Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU. - * - * work (workspace/output) void*, size (lwork) (in bytes) - * User supplied workspace, should be large enough - * to hold data structures for factors L and U. - * On exit, if fact is not 'F', L and U point to this array. - * - * lwork (input) int - * Specifies the size of work array in bytes. - * = 0: allocate space internally by system malloc; - * > 0: use user-supplied work array of length lwork in bytes, - * returns error if space runs out. - * = -1: the routine guesses the amount of space needed without - * performing the factorization, and returns it in - * mem_usage->total_needed; no other side effects. - * - * See argument 'mem_usage' for memory usage statistics. - * - * B (input/output) SuperMatrix* - * B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. - * On entry, the right hand side matrix. - * If B->ncol = 0, only LU decomposition is performed, the triangular - * solve is skipped. - * On exit, - * if equed = 'N', B is not modified; otherwise - * if A->Stype = SLU_NC: - * if options->Trans = NOTRANS and equed = 'R' or 'B', - * B is overwritten by diag(R)*B; - * if options->Trans = TRANS or CONJ and equed = 'C' of 'B', - * B is overwritten by diag(C)*B; - * if A->Stype = SLU_NR: - * if options->Trans = NOTRANS and equed = 'C' or 'B', - * B is overwritten by diag(C)*B; - * if options->Trans = TRANS or CONJ and equed = 'R' of 'B', - * B is overwritten by diag(R)*B. - * - * X (output) SuperMatrix* - * X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. - * If info = 0 or info = A->ncol+1, X contains the solution matrix - * to the original system of equations. Note that A and B are modified - * on exit if equed is not 'N', and the solution to the equilibrated - * system is inv(diag(C))*X if options->Trans = NOTRANS and - * equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C' - * and equed = 'R' or 'B'. - * - * recip_pivot_growth (output) float* - * The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). - * The infinity norm is used. If recip_pivot_growth is much less - * than 1, the stability of the LU factorization could be poor. - * - * rcond (output) float* - * The estimate of the reciprocal condition number of the matrix A - * after equilibration (if done). If rcond is less than the machine - * precision (in particular, if rcond = 0), the matrix is singular - * to working precision. This condition is indicated by a return - * code of info > 0. - * - * FERR (output) float*, dimension (B->ncol) - * The estimated forward error bound for each solution vector - * X(j) (the j-th column of the solution matrix X). - * If XTRUE is the true solution corresponding to X(j), FERR(j) - * is an estimated upper bound for the magnitude of the largest - * element in (X(j) - XTRUE) divided by the magnitude of the - * largest element in X(j). The estimate is as reliable as - * the estimate for RCOND, and is almost always a slight - * overestimate of the true error. - * If options->IterRefine = NOREFINE, ferr = 1.0. - * - * BERR (output) float*, dimension (B->ncol) - * The componentwise relative backward error of each solution - * vector X(j) (i.e., the smallest relative change in - * any element of A or B that makes X(j) an exact solution). - * If options->IterRefine = NOREFINE, berr = 1.0. - * - * mem_usage (output) mem_usage_t* - * Record the memory usage statistics, consisting of following fields: - * - for_lu (float) - * The amount of space used in bytes for L\U data structures. - * - total_needed (float) - * The amount of space needed in bytes to perform factorization. - * - expansions (int) - * The number of memory expansions during the LU factorization. - * - * stat (output) SuperLUStat_t* - * Record the statistics on runtime and floating-point operation count. - * See slu_util.h for the definition of 'SuperLUStat_t'. - * - * info (output) int* - * = 0: successful exit - * < 0: if info = -i, the i-th argument had an illegal value - * > 0: if info = i, and i is - * <= A->ncol: U(i,i) is exactly zero. The factorization has - * been completed, but the factor U is exactly - * singular, so the solution and error bounds - * could not be computed. - * = A->ncol+1: U is nonsingular, but RCOND is less than machine - * precision, meaning that the matrix is singular to - * working precision. Nevertheless, the solution and - * error bounds are computed because there are a number - * of situations where the computed solution can be more - * accurate than the value of RCOND would suggest. - * > A->ncol+1: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. - * </pre> - */ - -void -cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - int *etree, char *equed, float *R, float *C, - SuperMatrix *L, SuperMatrix *U, void *work, int lwork, - SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, - float *rcond, float *ferr, float *berr, - mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) -{ - - - DNformat *Bstore, *Xstore; - complex *Bmat, *Xmat; - int ldb, ldx, nrhs; - SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ - SuperMatrix AC; /* Matrix postmultiplied by Pc */ - int colequ, equil, nofact, notran, rowequ, permc_spec; - trans_t trant; - char norm[1]; - int i, j, info1; - float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; - int relax, panel_size; - float diag_pivot_thresh; - double t0; /* temporary time */ - double *utime; - - /* External functions */ - extern float clangs(char *, SuperMatrix *); - - Bstore = B->Store; - Xstore = X->Store; - Bmat = Bstore->nzval; - Xmat = Xstore->nzval; - ldb = Bstore->lda; - ldx = Xstore->lda; - nrhs = B->ncol; - - *info = 0; - nofact = (options->Fact != FACTORED); - equil = (options->Equil == YES); - notran = (options->Trans == NOTRANS); - if ( nofact ) { - *(unsigned char *)equed = 'N'; - rowequ = FALSE; - colequ = FALSE; - } else { - rowequ = lsame_(equed, "R") || lsame_(equed, "B"); - colequ = lsame_(equed, "C") || lsame_(equed, "B"); - smlnum = slamch_("Safe minimum"); - bignum = 1. / smlnum; - } - -#if 0 -printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n", - options->Fact, options->Trans, *equed); -#endif - - /* Test the input parameters */ - if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern && - options->Fact != SamePattern_SameRowPerm && - !notran && options->Trans != TRANS && options->Trans != CONJ && - !equil && options->Equil != NO) - *info = -1; - else if ( A->nrow != A->ncol || A->nrow < 0 || - (A->Stype != SLU_NC && A->Stype != SLU_NR) || - A->Dtype != SLU_C || A->Mtype != SLU_GE ) - *info = -2; - else if (options->Fact == FACTORED && - !(rowequ || colequ || lsame_(equed, "N"))) - *info = -6; - else { - if (rowequ) { - rcmin = bignum; - rcmax = 0.; - for (j = 0; j < A->nrow; ++j) { - rcmin = SUPERLU_MIN(rcmin, R[j]); - rcmax = SUPERLU_MAX(rcmax, R[j]); - } - if (rcmin <= 0.) *info = -7; - else if ( A->nrow > 0) - rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); - else rowcnd = 1.; - } - if (colequ && *info == 0) { - rcmin = bignum; - rcmax = 0.; - for (j = 0; j < A->nrow; ++j) { - rcmin = SUPERLU_MIN(rcmin, C[j]); - rcmax = SUPERLU_MAX(rcmax, C[j]); - } - if (rcmin <= 0.) *info = -8; - else if (A->nrow > 0) - colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); - else colcnd = 1.; - } - if (*info == 0) { - if ( lwork < -1 ) *info = -12; - else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || - B->Stype != SLU_DN || B->Dtype != SLU_C || - B->Mtype != SLU_GE ) - *info = -13; - else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || - (B->ncol != 0 && B->ncol != X->ncol) || - X->Stype != SLU_DN || - X->Dtype != SLU_C || X->Mtype != SLU_GE ) - *info = -14; - } - } - if (*info != 0) { - i = -(*info); - xerbla_("cgssvx", &i); - return; - } - - /* Initialization for factor parameters */ - panel_size = sp_ienv(1); - relax = sp_ienv(2); - diag_pivot_thresh = options->DiagPivotThresh; - - utime = stat->utime; - - /* Convert A to SLU_NC format when necessary. */ - if ( A->Stype == SLU_NR ) { - NRformat *Astore = A->Store; - AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); - cCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, - Astore->nzval, Astore->colind, Astore->rowptr, - SLU_NC, A->Dtype, A->Mtype); - if ( notran ) { /* Reverse the transpose argument. */ - trant = TRANS; - notran = 0; - } else { - trant = NOTRANS; - notran = 1; - } - } else { /* A->Stype == SLU_NC */ - trant = options->Trans; - AA = A; - } - - if ( nofact && equil ) { - t0 = SuperLU_timer_(); - /* Compute row and column scalings to equilibrate the matrix A. */ - cgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); - - if ( info1 == 0 ) { - /* Equilibrate matrix A. */ - claqgs(AA, R, C, rowcnd, colcnd, amax, equed); - rowequ = lsame_(equed, "R") || lsame_(equed, "B"); - colequ = lsame_(equed, "C") || lsame_(equed, "B"); - } - utime[EQUIL] = SuperLU_timer_() - t0; - } - - if ( nrhs > 0 ) { - /* Scale the right hand side if equilibration was performed. */ - if ( notran ) { - if ( rowequ ) { - for (j = 0; j < nrhs; ++j) - for (i = 0; i < A->nrow; ++i) { - cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]); - } - } - } else if ( colequ ) { - for (j = 0; j < nrhs; ++j) - for (i = 0; i < A->nrow; ++i) { - cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]); - } - } - } - - if ( nofact ) { - - t0 = SuperLU_timer_(); - /* - * Gnet column permutation vector perm_c[], according to permc_spec: - * permc_spec = NATURAL: natural ordering - * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A - * permc_spec = MMD_ATA: minimum degree on structure of A'*A - * permc_spec = COLAMD: approximate minimum degree column ordering - * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] - */ - permc_spec = options->ColPerm; - if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) - get_perm_c(permc_spec, AA, perm_c); - utime[COLPERM] = SuperLU_timer_() - t0; - - t0 = SuperLU_timer_(); - sp_preorder(options, AA, perm_c, etree, &AC); - utime[ETREE] = SuperLU_timer_() - t0; - -/* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", - relax, panel_size, sp_ienv(3), sp_ienv(4)); - fflush(stdout); */ - - /* Compute the LU factorization of A*Pc. */ - t0 = SuperLU_timer_(); - cgstrf(options, &AC, relax, panel_size, etree, - work, lwork, perm_c, perm_r, L, U, stat, info); - utime[FACT] = SuperLU_timer_() - t0; - - if ( lwork == -1 ) { - mem_usage->total_needed = *info - A->ncol; - return; - } - } - - if ( options->PivotGrowth ) { - if ( *info > 0 ) { - if ( *info <= A->ncol ) { - /* Compute the reciprocal pivot growth factor of the leading - rank-deficient *info columns of A. */ - *recip_pivot_growth = cPivotGrowth(*info, AA, perm_c, L, U); - } - return; - } - - /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ - *recip_pivot_growth = cPivotGrowth(A->ncol, AA, perm_c, L, U); - } - - if ( options->ConditionNumber ) { - /* Estimate the reciprocal of the condition number of A. */ - t0 = SuperLU_timer_(); - if ( notran ) { - *(unsigned char *)norm = '1'; - } else { - *(unsigned char *)norm = 'I'; - } - anorm = clangs(norm, AA); - cgscon(norm, L, U, anorm, rcond, stat, info); - utime[RCOND] = SuperLU_timer_() - t0; - } - - if ( nrhs > 0 ) { - /* Compute the solution matrix X. */ - for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ - for (i = 0; i < B->nrow; i++) - Xmat[i + j*ldx] = Bmat[i + j*ldb]; - - t0 = SuperLU_timer_(); - cgstrs (trant, L, U, perm_c, perm_r, X, stat, info); - utime[SOLVE] = SuperLU_timer_() - t0; - - /* Use iterative refinement to improve the computed solution and compute - error bounds and backward error estimates for it. */ - t0 = SuperLU_timer_(); - if ( options->IterRefine != NOREFINE ) { - cgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B, - X, ferr, berr, stat, info); - } else { - for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0; - } - utime[REFINE] = SuperLU_timer_() - t0; - - /* Transform the solution matrix X to a solution of the original system. */ - if ( notran ) { - if ( colequ ) { - for (j = 0; j < nrhs; ++j) - for (i = 0; i < A->nrow; ++i) { - cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]); - } - } - } else if ( rowequ ) { - for (j = 0; j < nrhs; ++j) - for (i = 0; i < A->nrow; ++i) { - cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]); - } - } - } /* end if nrhs > 0 */ - - if ( options->ConditionNumber ) { - /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ - if ( *rcond < slamch_("E") ) *info = A->ncol + 1; - } - - if ( nofact ) { - cQuerySpace(L, U, mem_usage); - Destroy_CompCol_Permuted(&AC); - } - if ( A->Stype == SLU_NR ) { - Destroy_SuperMatrix_Store(AA); - SUPERLU_FREE(AA); - } - -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/clacon.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/clacon.c.bak deleted file mode 100644 index 3b826f5..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/clacon.c.bak +++ /dev/null @@ -1,221 +0,0 @@ - -/*! @file clacon.c - * \brief Estimates the 1-norm - * - * <pre> - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * </pre> - */ -#include <math.h> -#include "slu_Cnames.h" -#include "slu_scomplex.h" - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * CLACON estimates the 1-norm of a square matrix A. - * Reverse communication is used for evaluating matrix-vector products. - * - * - * Arguments - * ========= - * - * N (input) INT - * The order of the matrix. N >= 1. - * - * V (workspace) COMPLEX PRECISION array, dimension (N) - * On the final return, V = A*W, where EST = norm(V)/norm(W) - * (W is not returned). - * - * X (input/output) COMPLEX PRECISION array, dimension (N) - * On an intermediate return, X should be overwritten by - * A * X, if KASE=1, - * A' * X, if KASE=2, - * where A' is the conjugate transpose of A, - * and CLACON must be re-called with all the other parameters - * unchanged. - * - * - * EST (output) FLOAT PRECISION - * An estimate (a lower bound) for norm(A). - * - * KASE (input/output) INT - * On the initial call to CLACON, KASE should be 0. - * On an intermediate return, KASE will be 1 or 2, indicating - * whether X should be overwritten by A * X or A' * X. - * On the final return from CLACON, KASE will again be 0. - * - * Further Details - * ======= ======= - * - * Contributed by Nick Higham, University of Manchester. - * Originally named CONEST, dated March 16, 1988. - * - * Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of - * a real or complex matrix, with applications to condition estimation", - * ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. - * ===================================================================== - * </pre> - */ - -int -clacon_(int *n, complex *v, complex *x, float *est, int *kase) - -{ - - - /* Table of constant values */ - int c__1 = 1; - complex zero = {0.0, 0.0}; - complex one = {1.0, 0.0}; - - /* System generated locals */ - float d__1; - - /* Local variables */ - static int iter; - static int jump, jlast; - static float altsgn, estold; - static int i, j; - float temp; - float safmin; - extern double slamch_(char *); - extern int icmax1_(int *, complex *, int *); - extern double scsum1_(int *, complex *, int *); - - safmin = slamch_("Safe minimum"); - if ( *kase == 0 ) { - for (i = 0; i < *n; ++i) { - x[i].r = 1. / (float) (*n); - x[i].i = 0.; - } - *kase = 1; - jump = 1; - return 0; - } - - switch (jump) { - case 1: goto L20; - case 2: goto L40; - case 3: goto L70; - case 4: goto L110; - case 5: goto L140; - } - - /* ................ ENTRY (JUMP = 1) - FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ - L20: - if (*n == 1) { - v[0] = x[0]; - *est = c_abs(&v[0]); - /* ... QUIT */ - goto L150; - } - *est = scsum1_(n, x, &c__1); - - for (i = 0; i < *n; ++i) { - d__1 = c_abs(&x[i]); - if (d__1 > safmin) { - d__1 = 1 / d__1; - x[i].r *= d__1; - x[i].i *= d__1; - } else { - x[i] = one; - } - } - *kase = 2; - jump = 2; - return 0; - - /* ................ ENTRY (JUMP = 2) - FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ -L40: - j = icmax1_(n, &x[0], &c__1); - --j; - iter = 2; - - /* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ -L50: - for (i = 0; i < *n; ++i) x[i] = zero; - x[j] = one; - *kase = 1; - jump = 3; - return 0; - - /* ................ ENTRY (JUMP = 3) - X HAS BEEN OVERWRITTEN BY A*X. */ -L70: -#ifdef _CRAY - CCOPY(n, x, &c__1, v, &c__1); -#else - ccopy_(n, x, &c__1, v, &c__1); -#endif - estold = *est; - *est = scsum1_(n, v, &c__1); - - -L90: - /* TEST FOR CYCLING. */ - if (*est <= estold) goto L120; - - for (i = 0; i < *n; ++i) { - d__1 = c_abs(&x[i]); - if (d__1 > safmin) { - d__1 = 1 / d__1; - x[i].r *= d__1; - x[i].i *= d__1; - } else { - x[i] = one; - } - } - *kase = 2; - jump = 4; - return 0; - - /* ................ ENTRY (JUMP = 4) - X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */ -L110: - jlast = j; - j = icmax1_(n, &x[0], &c__1); - --j; - if (x[jlast].r != (d__1 = x[j].r, fabs(d__1)) && iter < 5) { - ++iter; - goto L50; - } - - /* ITERATION COMPLETE. FINAL STAGE. */ -L120: - altsgn = 1.; - for (i = 1; i <= *n; ++i) { - x[i-1].r = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.); - x[i-1].i = 0.; - altsgn = -altsgn; - } - *kase = 1; - jump = 5; - return 0; - - /* ................ ENTRY (JUMP = 5) - X HAS BEEN OVERWRITTEN BY A*X. */ -L140: - temp = scsum1_(n, x, &c__1) / (float)(*n * 3) * 2.; - if (temp > *est) { -#ifdef _CRAY - CCOPY(n, &x[0], &c__1, &v[0], &c__1); -#else - ccopy_(n, &x[0], &c__1, &v[0], &c__1); -#endif - *est = temp; - } - -L150: - *kase = 0; - return 0; - -} /* clacon_ */ diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/dgsitrf.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/dgsitrf.c.bak deleted file mode 100644 index 969b5c1..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/dgsitrf.c.bak +++ /dev/null @@ -1,626 +0,0 @@ - -/*! @file dgsitf.c - * \brief Computes an ILU factorization of a general sparse matrix - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory. - * June 30, 2009 - * </pre> - */ - -#include "slu_ddefs.h" - -#ifdef DEBUG -int num_drop_L; -#endif - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * DGSITRF computes an ILU factorization of a general sparse m-by-n - * matrix A using partial pivoting with row interchanges. - * The factorization has the form - * Pr * A = L * U - * where Pr is a row permutation matrix, L is lower triangular with unit - * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper - * triangular (upper trapezoidal if A->nrow < A->ncol). - * - * See supermatrix.h for the definition of 'SuperMatrix' structure. - * - * Arguments - * ========= - * - * options (input) superlu_options_t* - * The structure defines the input parameters to control - * how the ILU decomposition will be performed. - * - * A (input) SuperMatrix* - * Original matrix A, permuted by columns, of dimension - * (A->nrow, A->ncol). The type of A can be: - * Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. - * - * relax (input) int - * To control degree of relaxing supernodes. If the number - * of nodes (columns) in a subtree of the elimination tree is less - * than relax, this subtree is considered as one supernode, - * regardless of the row structures of those columns. - * - * panel_size (input) int - * A panel consists of at most panel_size consecutive columns. - * - * etree (input) int*, dimension (A->ncol) - * Elimination tree of A'*A. - * Note: etree is a vector of parent pointers for a forest whose - * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. - * On input, the columns of A should be permuted so that the - * etree is in a certain postorder. - * - * work (input/output) void*, size (lwork) (in bytes) - * User-supplied work space and space for the output data structures. - * Not referenced if lwork = 0; - * - * lwork (input) int - * Specifies the size of work array in bytes. - * = 0: allocate space internally by system malloc; - * > 0: use user-supplied work array of length lwork in bytes, - * returns error if space runs out. - * = -1: the routine guesses the amount of space needed without - * performing the factorization, and returns it in - * *info; no other side effects. - * - * perm_c (input) int*, dimension (A->ncol) - * Column permutation vector, which defines the - * permutation matrix Pc; perm_c[i] = j means column i of A is - * in position j in A*Pc. - * When searching for diagonal, perm_c[*] is applied to the - * row subscripts of A, so that diagonal threshold pivoting - * can find the diagonal of A, rather than that of A*Pc. - * - * perm_r (input/output) int*, dimension (A->nrow) - * Row permutation vector which defines the permutation matrix Pr, - * perm_r[i] = j means row i of A is in position j in Pr*A. - * If options->Fact = SamePattern_SameRowPerm, the pivoting routine - * will try to use the input perm_r, unless a certain threshold - * criterion is violated. In that case, perm_r is overwritten by - * a new permutation determined by partial pivoting or diagonal - * threshold pivoting. - * Otherwise, perm_r is output argument; - * - * L (output) SuperMatrix* - * The factor L from the factorization Pr*A=L*U; use compressed row - * subscripts storage for supernodes, i.e., L has type: - * Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. - * - * U (output) SuperMatrix* - * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise - * storage scheme, i.e., U has types: Stype = SLU_NC, - * Dtype = SLU_D, Mtype = SLU_TRU. - * - * stat (output) SuperLUStat_t* - * Record the statistics on runtime and floating-point operation count. - * See slu_util.h for the definition of 'SuperLUStat_t'. - * - * info (output) int* - * = 0: successful exit - * < 0: if info = -i, the i-th argument had an illegal value - * > 0: if info = i, and i is - * <= A->ncol: number of zero pivots. They are replaced by small - * entries according to options->ILU_FillTol. - * > A->ncol: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. If lwork = -1, it is - * the estimated amount of space needed, plus A->ncol. - * - * ====================================================================== - * - * Local Working Arrays: - * ====================== - * m = number of rows in the matrix - * n = number of columns in the matrix - * - * marker[0:3*m-1]: marker[i] = j means that node i has been - * reached when working on column j. - * Storage: relative to original row subscripts - * NOTE: There are 4 of them: - * marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c; - * marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c; - * marker_relax(has its own space) is used for relaxed supernodes. - * - * parent[0:m-1]: parent vector used during dfs - * Storage: relative to new row subscripts - * - * xplore[0:m-1]: xplore[i] gives the location of the next (dfs) - * unexplored neighbor of i in lsub[*] - * - * segrep[0:nseg-1]: contains the list of supernodal representatives - * in topological order of the dfs. A supernode representative is the - * last column of a supernode. - * The maximum size of segrep[] is n. - * - * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a - * supernodal representative r, repfnz[r] is the location of the first - * nonzero in this segment. It is also used during the dfs: repfnz[r]>0 - * indicates the supernode r has been explored. - * NOTE: There are W of them, each used for one column of a panel. - * - * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below - * the panel diagonal. These are filled in during dpanel_dfs(), and are - * used later in the inner LU factorization within the panel. - * panel_lsub[]/dense[] pair forms the SPA data structure. - * NOTE: There are W of them. - * - * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; - * NOTE: there are W of them. - * - * tempv[0:*]: real temporary used for dense numeric kernels; - * The size of this array is defined by NUM_TEMPV() in slu_util.h. - * It is also used by the dropping routine ilu_ddrop_row(). - * </pre> - */ - -void -dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, - int *etree, void *work, int lwork, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) -{ - /* Local working arrays */ - NCPformat *Astore; - int *iperm_r = NULL; /* inverse of perm_r; used when - options->Fact == SamePattern_SameRowPerm */ - int *iperm_c; /* inverse of perm_c */ - int *swap, *iswap; /* swap is used to store the row permutation - during the factorization. Initially, it is set - to iperm_c (row indeces of Pc*A*Pc'). - iswap is the inverse of swap. After the - factorization, it is equal to perm_r. */ - int *iwork; - double *dwork; - int *segrep, *repfnz, *parent, *xplore; - int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ - int *marker, *marker_relax; - double *dense, *tempv; - int *relax_end, *relax_fsupc; - double *a; - int *asub; - int *xa_begin, *xa_end; - int *xsup, *supno; - int *xlsub, *xlusup, *xusub; - int nzlumax; - double *amax; - double drop_sum; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ - int *iwork2; /* used by the second dropping rule */ - - /* Local scalars */ - fact_t fact = options->Fact; - double diag_pivot_thresh = options->DiagPivotThresh; - double drop_tol = options->ILU_DropTol; /* tau */ - double fill_ini = options->ILU_FillTol; /* tau^hat */ - double gamma = options->ILU_FillFactor; - int drop_rule = options->ILU_DropRule; - milu_t milu = options->ILU_MILU; - double fill_tol; - int pivrow; /* pivotal row number in the original matrix A */ - int nseg1; /* no of segments in U-column above panel row jcol */ - int nseg; /* no of segments in each U-column */ - register int jcol; - register int kcol; /* end column of a relaxed snode */ - register int icol; - register int i, k, jj, new_next, iinfo; - int m, n, min_mn, jsupno, fsupc, nextlu, nextu; - int w_def; /* upper bound on panel width */ - int usepr, iperm_r_allocated = 0; - int nnzL, nnzU; - int *panel_histo = stat->panel_histo; - flops_t *ops = stat->ops; - - int last_drop;/* the last column which the dropping rules applied */ - int quota; - int nnzAj; /* number of nonzeros in A(:,1:j) */ - int nnzLj, nnzUj; - double tol_L = drop_tol, tol_U = drop_tol; - double zero = 0.0; - - /* Executable */ - iinfo = 0; - m = A->nrow; - n = A->ncol; - min_mn = SUPERLU_MIN(m, n); - Astore = A->Store; - a = Astore->nzval; - asub = Astore->rowind; - xa_begin = Astore->colbeg; - xa_end = Astore->colend; - - /* Allocate storage common to the factor routines */ - *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, - gamma, L, U, &Glu, &iwork, &dwork); - if ( *info ) return; - - xsup = Glu.xsup; - supno = Glu.supno; - xlsub = Glu.xlsub; - xlusup = Glu.xlusup; - xusub = Glu.xusub; - - SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, - &repfnz, &panel_lsub, &marker_relax, &marker); - dSetRWork(m, panel_size, dwork, &dense, &tempv); - - usepr = (fact == SamePattern_SameRowPerm); - if ( usepr ) { - /* Compute the inverse of perm_r */ - iperm_r = (int *) intMalloc(m); - for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; - iperm_r_allocated = 1; - } - - iperm_c = (int *) intMalloc(n); - for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; - swap = (int *)intMalloc(n); - for (k = 0; k < n; k++) swap[k] = iperm_c[k]; - iswap = (int *)intMalloc(n); - for (k = 0; k < n; k++) iswap[k] = perm_c[k]; - amax = (double *) doubleMalloc(panel_size); - if (drop_rule & DROP_SECONDARY) - iwork2 = (int *)intMalloc(n); - else - iwork2 = NULL; - - nnzAj = 0; - nnzLj = 0; - nnzUj = 0; - last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95)); - - /* Identify relaxed snodes */ - relax_end = (int *) intMalloc(n); - relax_fsupc = (int *) intMalloc(n); - if ( options->SymmetricMode == YES ) - ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); - else - ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); - - ifill (perm_r, m, EMPTY); - ifill (marker, m * NO_MARKER, EMPTY); - supno[0] = -1; - xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; - w_def = panel_size; - - /* Mark the rows used by relaxed supernodes */ - ifill (marker_relax, m, EMPTY); - i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, - asub, marker_relax); -#if ( PRNTlevel >= 1) - printf("%d relaxed supernodes.\n", i); -#endif - - /* - * Work on one "panel" at a time. A panel is one of the following: - * (a) a relaxed supernode at the bottom of the etree, or - * (b) panel_size contiguous columns, defined by the user - */ - for (jcol = 0; jcol < min_mn; ) { - - if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ - kcol = relax_end[jcol]; /* end of the relaxed snode */ - panel_histo[kcol-jcol+1]++; - - /* Drop small rows in the previous supernode. */ - if (jcol > 0 && jcol < last_drop) { - int first = xsup[supno[jcol - 1]]; - int last = jcol - 1; - int quota; - - /* Compute the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * (m - first) / m - * (last - first + 1); - else if (drop_rule & DROP_COLUMN) { - int i; - quota = 0; - for (i = first; i <= last; i++) - quota += xa_end[i] - xa_begin[i]; - quota = gamma * quota * (m - first) / m; - } else if (drop_rule & DROP_AREA) - quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - - nnzLj; - else - quota = m * n; - fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); - - /* Drop small rows */ - i = ilu_ddrop_row(options, first, last, tol_L, quota, &nnzLj, - &fill_tol, &Glu, tempv, iwork2, 0); - /* Reset the parameters */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - < nnzLj) - tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); - else - tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); - } - if (fill_tol < 0) iinfo -= (int)fill_tol; -#ifdef DEBUG - num_drop_L += i * (last - first + 1); -#endif - } - - /* -------------------------------------- - * Factorize the relaxed supernode(jcol:kcol) - * -------------------------------------- */ - /* Determine the union of the row structure of the snode */ - if ( (*info = ilu_dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end, - marker, &Glu)) != 0 ) - return; - - nextu = xusub[jcol]; - nextlu = xlusup[jcol]; - jsupno = supno[jcol]; - fsupc = xsup[jsupno]; - new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); - nzlumax = Glu.nzlumax; - while ( new_next > nzlumax ) { - if ((*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) - return; - } - - for (icol = jcol; icol <= kcol; icol++) { - xusub[icol+1] = nextu; - - amax[0] = 0.0; - /* Scatter into SPA dense[*] */ - for (k = xa_begin[icol]; k < xa_end[icol]; k++) { - register double tmp = fabs(a[k]); - if (tmp > amax[0]) amax[0] = tmp; - dense[asub[k]] = a[k]; - } - nnzAj += xa_end[icol] - xa_begin[icol]; - if (amax[0] == 0.0) { - amax[0] = fill_ini; -#if ( PRNTlevel >= 1) - printf("Column %d is entirely zero!\n", icol); - fflush(stdout); -#endif - } - - /* Numeric update within the snode */ - dsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); - - if (usepr) pivrow = iperm_r[icol]; - fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); - if ( (*info = ilu_dpivotL(icol, diag_pivot_thresh, &usepr, - perm_r, iperm_c[icol], swap, iswap, - marker_relax, &pivrow, - amax[0] * fill_tol, milu, zero, - &Glu, stat)) ) { - iinfo++; - marker[pivrow] = kcol; - } - - } - - jcol = kcol + 1; - - } else { /* Work on one panel of panel_size columns */ - - /* Adjust panel_size so that a panel won't overlap with the next - * relaxed snode. - */ - panel_size = w_def; - for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) - if ( relax_end[k] != EMPTY ) { - panel_size = k - jcol; - break; - } - if ( k == min_mn ) panel_size = min_mn - jcol; - panel_histo[panel_size]++; - - /* symbolic factor on a panel of columns */ - ilu_dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, - dense, amax, panel_lsub, segrep, repfnz, - marker, parent, xplore, &Glu); - - /* numeric sup-panel updates in topological order */ - dpanel_bmod(m, panel_size, jcol, nseg1, dense, - tempv, segrep, repfnz, &Glu, stat); - - /* Sparse LU within the panel, and below panel diagonal */ - for (jj = jcol; jj < jcol + panel_size; jj++) { - - k = (jj - jcol) * m; /* column index for w-wide arrays */ - - nseg = nseg1; /* Begin after all the panel segments */ - - nnzAj += xa_end[jj] - xa_begin[jj]; - - if ((*info = ilu_dcolumn_dfs(m, jj, perm_r, &nseg, - &panel_lsub[k], segrep, &repfnz[k], - marker, parent, xplore, &Glu))) - return; - - /* Numeric updates */ - if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k], - tempv, &segrep[nseg1], &repfnz[k], - jcol, &Glu, stat)) != 0) return; - - /* Make a fill-in position if the column is entirely zero */ - if (xlsub[jj + 1] == xlsub[jj]) { - register int i, row; - int nextl; - int nzlmax = Glu.nzlmax; - int *lsub = Glu.lsub; - int *marker2 = marker + 2 * m; - - /* Allocate memory */ - nextl = xlsub[jj] + 1; - if (nextl >= nzlmax) { - int error = dLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); - if (error) { *info = error; return; } - lsub = Glu.lsub; - } - xlsub[jj + 1]++; - assert(xlusup[jj]==xlusup[jj+1]); - xlusup[jj + 1]++; - Glu.lusup[xlusup[jj]] = zero; - - /* Choose a row index (pivrow) for fill-in */ - for (i = jj; i < n; i++) - if (marker_relax[swap[i]] <= jj) break; - row = swap[i]; - marker2[row] = jj; - lsub[xlsub[jj]] = row; -#ifdef DEBUG - printf("Fill col %d.\n", jj); - fflush(stdout); -#endif - } - - /* Computer the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * jj / m; - else if (drop_rule & DROP_COLUMN) - quota = gamma * (xa_end[jj] - xa_begin[jj]) * - (jj + 1) / m; - else if (drop_rule & DROP_AREA) - quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; - else - quota = m; - - /* Copy the U-segments to ucol[*] and drop small entries */ - if ((*info = ilu_dcopy_to_ucol(jj, nseg, segrep, &repfnz[k], - perm_r, &dense[k], drop_rule, - milu, amax[jj - jcol] * tol_U, - quota, &drop_sum, &nnzUj, &Glu, - iwork2)) != 0) - return; - - /* Reset the dropping threshold if required */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) - tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); - else - tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); - } - - drop_sum *= MILU_ALPHA; - if (usepr) pivrow = iperm_r[jj]; - fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); - if ( (*info = ilu_dpivotL(jj, diag_pivot_thresh, &usepr, perm_r, - iperm_c[jj], swap, iswap, - marker_relax, &pivrow, - amax[jj - jcol] * fill_tol, milu, - drop_sum, &Glu, stat)) ) { - iinfo++; - marker[m + pivrow] = jj; - marker[2 * m + pivrow] = jj; - } - - /* Reset repfnz[] for this column */ - resetrep_col (nseg, segrep, &repfnz[k]); - - /* Start a new supernode, drop the previous one */ - if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { - int first = xsup[supno[jj - 1]]; - int last = jj - 1; - int quota; - - /* Compute the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * (m - first) / m - * (last - first + 1); - else if (drop_rule & DROP_COLUMN) { - int i; - quota = 0; - for (i = first; i <= last; i++) - quota += xa_end[i] - xa_begin[i]; - quota = gamma * quota * (m - first) / m; - } else if (drop_rule & DROP_AREA) - quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) - / m) - nnzLj; - else - quota = m * n; - fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / - (double)min_mn); - - /* Drop small rows */ - i = ilu_ddrop_row(options, first, last, tol_L, quota, - &nnzLj, &fill_tol, &Glu, tempv, iwork2, - 1); - - /* Reset the parameters */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - < nnzLj) - tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); - else - tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); - } - if (fill_tol < 0) iinfo -= (int)fill_tol; -#ifdef DEBUG - num_drop_L += i * (last - first + 1); -#endif - } /* if start a new supernode */ - - } /* for */ - - jcol += panel_size; /* Move to the next panel */ - - } /* else */ - - } /* for */ - - *info = iinfo; - - if ( m > n ) { - k = 0; - for (i = 0; i < m; ++i) - if ( perm_r[i] == EMPTY ) { - perm_r[i] = n + k; - ++k; - } - } - - ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); - fixupL(min_mn, perm_r, &Glu); - - dLUWorkFree(iwork, dwork, &Glu); /* Free work space and compress storage */ - - if ( fact == SamePattern_SameRowPerm ) { - /* L and U structures may have changed due to possibly different - pivoting, even though the storage is available. - There could also be memory expansions, so the array locations - may have changed, */ - ((SCformat *)L->Store)->nnz = nnzL; - ((SCformat *)L->Store)->nsuper = Glu.supno[n]; - ((SCformat *)L->Store)->nzval = Glu.lusup; - ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; - ((SCformat *)L->Store)->rowind = Glu.lsub; - ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; - ((NCformat *)U->Store)->nnz = nnzU; - ((NCformat *)U->Store)->nzval = Glu.ucol; - ((NCformat *)U->Store)->rowind = Glu.usub; - ((NCformat *)U->Store)->colptr = Glu.xusub; - } else { - dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, - Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, - Glu.xsup, SLU_SC, SLU_D, SLU_TRLU); - dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, - Glu.usub, Glu.xusub, SLU_NC, SLU_D, SLU_TRU); - } - - ops[FACT] += ops[TRSV] + ops[GEMV]; - stat->expansions = --(Glu.num_expansions); - - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - SUPERLU_FREE (swap); - SUPERLU_FREE (iswap); - SUPERLU_FREE (relax_fsupc); - SUPERLU_FREE (amax); - if ( iwork2 ) SUPERLU_FREE (iwork2); - -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/dlacon.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/dlacon.c.bak deleted file mode 100644 index 951fe7a..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/dlacon.c.bak +++ /dev/null @@ -1,236 +0,0 @@ - -/*! @file dlacon.c - * \brief Estimates the 1-norm - * - * <pre> - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * </pre> - */ -#include <math.h> -#include "slu_Cnames.h" - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * DLACON estimates the 1-norm of a square matrix A. - * Reverse communication is used for evaluating matrix-vector products. - * - * - * Arguments - * ========= - * - * N (input) INT - * The order of the matrix. N >= 1. - * - * V (workspace) DOUBLE PRECISION array, dimension (N) - * On the final return, V = A*W, where EST = norm(V)/norm(W) - * (W is not returned). - * - * X (input/output) DOUBLE PRECISION array, dimension (N) - * On an intermediate return, X should be overwritten by - * A * X, if KASE=1, - * A' * X, if KASE=2, - * and DLACON must be re-called with all the other parameters - * unchanged. - * - * ISGN (workspace) INT array, dimension (N) - * - * EST (output) DOUBLE PRECISION - * An estimate (a lower bound) for norm(A). - * - * KASE (input/output) INT - * On the initial call to DLACON, KASE should be 0. - * On an intermediate return, KASE will be 1 or 2, indicating - * whether X should be overwritten by A * X or A' * X. - * On the final return from DLACON, KASE will again be 0. - * - * Further Details - * ======= ======= - * - * Contributed by Nick Higham, University of Manchester. - * Originally named CONEST, dated March 16, 1988. - * - * Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of - * a real or complex matrix, with applications to condition estimation", - * ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. - * ===================================================================== - * </pre> - */ - -int -dlacon_(int *n, double *v, double *x, int *isgn, double *est, int *kase) - -{ - - - /* Table of constant values */ - int c__1 = 1; - double zero = 0.0; - double one = 1.0; - - /* Local variables */ - static int iter; - static int jump, jlast; - static double altsgn, estold; - static int i, j; - double temp; -#ifdef _CRAY - extern int ISAMAX(int *, double *, int *); - extern double SASUM(int *, double *, int *); - extern int SCOPY(int *, double *, int *, double *, int *); -#else - extern int idamax_(int *, double *, int *); - extern double dasum_(int *, double *, int *); - extern int dcopy_(int *, double *, int *, double *, int *); -#endif -#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a)) /* Copy sign */ -#define i_dnnt(a) \ - ( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */ - - if ( *kase == 0 ) { - for (i = 0; i < *n; ++i) { - x[i] = 1. / (double) (*n); - } - *kase = 1; - jump = 1; - return 0; - } - - switch (jump) { - case 1: goto L20; - case 2: goto L40; - case 3: goto L70; - case 4: goto L110; - case 5: goto L140; - } - - /* ................ ENTRY (JUMP = 1) - FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ - L20: - if (*n == 1) { - v[0] = x[0]; - *est = fabs(v[0]); - /* ... QUIT */ - goto L150; - } -#ifdef _CRAY - *est = SASUM(n, x, &c__1); -#else - *est = dasum_(n, x, &c__1); -#endif - - for (i = 0; i < *n; ++i) { - x[i] = d_sign(one, x[i]); - isgn[i] = i_dnnt(x[i]); - } - *kase = 2; - jump = 2; - return 0; - - /* ................ ENTRY (JUMP = 2) - FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ -L40: -#ifdef _CRAY - j = ISAMAX(n, &x[0], &c__1); -#else - j = idamax_(n, &x[0], &c__1); -#endif - --j; - iter = 2; - - /* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ -L50: - for (i = 0; i < *n; ++i) x[i] = zero; - x[j] = one; - *kase = 1; - jump = 3; - return 0; - - /* ................ ENTRY (JUMP = 3) - X HAS BEEN OVERWRITTEN BY A*X. */ -L70: -#ifdef _CRAY - SCOPY(n, x, &c__1, v, &c__1); -#else - dcopy_(n, x, &c__1, v, &c__1); -#endif - estold = *est; -#ifdef _CRAY - *est = SASUM(n, v, &c__1); -#else - *est = dasum_(n, v, &c__1); -#endif - - for (i = 0; i < *n; ++i) - if (i_dnnt(d_sign(one, x[i])) != isgn[i]) - goto L90; - - /* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */ - goto L120; - -L90: - /* TEST FOR CYCLING. */ - if (*est <= estold) goto L120; - - for (i = 0; i < *n; ++i) { - x[i] = d_sign(one, x[i]); - isgn[i] = i_dnnt(x[i]); - } - *kase = 2; - jump = 4; - return 0; - - /* ................ ENTRY (JUMP = 4) - X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */ -L110: - jlast = j; -#ifdef _CRAY - j = ISAMAX(n, &x[0], &c__1); -#else - j = idamax_(n, &x[0], &c__1); -#endif - --j; - if (x[jlast] != fabs(x[j]) && iter < 5) { - ++iter; - goto L50; - } - - /* ITERATION COMPLETE. FINAL STAGE. */ -L120: - altsgn = 1.; - for (i = 1; i <= *n; ++i) { - x[i-1] = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.); - altsgn = -altsgn; - } - *kase = 1; - jump = 5; - return 0; - - /* ................ ENTRY (JUMP = 5) - X HAS BEEN OVERWRITTEN BY A*X. */ -L140: -#ifdef _CRAY - temp = SASUM(n, x, &c__1) / (double)(*n * 3) * 2.; -#else - temp = dasum_(n, x, &c__1) / (double)(*n * 3) * 2.; -#endif - if (temp > *est) { -#ifdef _CRAY - SCOPY(n, &x[0], &c__1, &v[0], &c__1); -#else - dcopy_(n, &x[0], &c__1, &v[0], &c__1); -#endif - *est = temp; - } - -L150: - *kase = 0; - return 0; - -} /* dlacon_ */ diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ccopy_to_ucol.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ccopy_to_ucol.c.bak deleted file mode 100644 index ba32096..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ccopy_to_ucol.c.bak +++ /dev/null @@ -1,202 +0,0 @@ - -/*! @file ilu_ccopy_to_ucol.c - * \brief Copy a computed column of U to the compressed data structure - * and drop some small entries - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory - * June 30, 2009 - * </pre> - */ - -#include "slu_cdefs.h" - -#ifdef DEBUG -int num_drop_U; -#endif - -static complex *A; /* used in _compare_ only */ -static int _compare_(const void *a, const void *b) -{ - register int *x = (int *)a, *y = (int *)b; - register float xx = c_abs1(&A[*x]), yy = c_abs1(&A[*y]); - if (xx > yy) return -1; - else if (xx < yy) return 1; - else return 0; -} - - -int -ilu_ccopy_to_ucol( - int jcol, /* in */ - int nseg, /* in */ - int *segrep, /* in */ - int *repfnz, /* in */ - int *perm_r, /* in */ - complex *dense, /* modified - reset to zero on return */ - int drop_rule,/* in */ - milu_t milu, /* in */ - double drop_tol, /* in */ - int quota, /* maximum nonzero entries allowed */ - complex *sum, /* out - the sum of dropped entries */ - int *nnzUj, /* in - out */ - GlobalLU_t *Glu, /* modified */ - int *work /* working space with minimum size n, - * used by the second dropping rule */ - ) -{ -/* - * Gather from SPA dense[*] to global ucol[*]. - */ - int ksub, krep, ksupno; - int i, k, kfnz, segsze; - int fsupc, isub, irow; - int jsupno, nextu; - int new_next, mem_error; - int *xsup, *supno; - int *lsub, *xlsub; - complex *ucol; - int *usub, *xusub; - int nzumax; - int m; /* number of entries in the nonzero U-segments */ - register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); - register double tmp; - complex zero = {0.0, 0.0}; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - ucol = Glu->ucol; - usub = Glu->usub; - xusub = Glu->xusub; - nzumax = Glu->nzumax; - - *sum = zero; - if (drop_rule == NODROP) { - drop_tol = -1.0, quota = Glu->n; - } - - jsupno = supno[jcol]; - nextu = xusub[jcol]; - k = nseg - 1; - for (ksub = 0; ksub < nseg; ksub++) { - krep = segrep[k--]; - ksupno = supno[krep]; - - if ( ksupno != jsupno ) { /* Should go into ucol[] */ - kfnz = repfnz[krep]; - if ( kfnz != EMPTY ) { /* Nonzero U-segment */ - - fsupc = xsup[ksupno]; - isub = xlsub[fsupc] + kfnz - fsupc; - segsze = krep - kfnz + 1; - - new_next = nextu + segsze; - while ( new_next > nzumax ) { - if ((mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax, - Glu)) != 0) - return (mem_error); - ucol = Glu->ucol; - if ((mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax, - Glu)) != 0) - return (mem_error); - usub = Glu->usub; - lsub = Glu->lsub; - } - - for (i = 0; i < segsze; i++) { - irow = lsub[isub++]; - tmp = c_abs1(&dense[irow]); - - /* first dropping rule */ - if (quota > 0 && tmp >= drop_tol) { - if (tmp > d_max) d_max = tmp; - if (tmp < d_min) d_min = tmp; - usub[nextu] = perm_r[irow]; - ucol[nextu] = dense[irow]; - nextu++; - } else { - switch (milu) { - case SMILU_1: - case SMILU_2: - c_add(sum, sum, &dense[irow]); - break; - case SMILU_3: - /* *sum += fabs(dense[irow]);*/ - sum->r += tmp; - break; - case SILU: - default: - break; - } -#ifdef DEBUG - num_drop_U++; -#endif - } - dense[irow] = zero; - } - - } - - } - - } /* for each segment... */ - - xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ - m = xusub[jcol + 1] - xusub[jcol]; - - /* second dropping rule */ - if (drop_rule & DROP_SECONDARY && m > quota) { - register double tol = d_max; - register int m0 = xusub[jcol] + m - 1; - - if (quota > 0) { - if (drop_rule & DROP_INTERP) { - d_max = 1.0 / d_max; d_min = 1.0 / d_min; - tol = 1.0 / (d_max + (d_min - d_max) * quota / m); - } else { - A = &ucol[xusub[jcol]]; - for (i = 0; i < m; i++) work[i] = i; - qsort(work, m, sizeof(int), _compare_); - tol = fabs(usub[xusub[jcol] + work[quota]]); - } - } - for (i = xusub[jcol]; i <= m0; ) { - if (c_abs1(&ucol[i]) <= tol) { - switch (milu) { - case SMILU_1: - case SMILU_2: - c_add(sum, sum, &ucol[i]); - break; - case SMILU_3: - sum->r += tmp; - break; - case SILU: - default: - break; - } - ucol[i] = ucol[m0]; - usub[i] = usub[m0]; - m0--; - m--; -#ifdef DEBUG - num_drop_U++; -#endif - xusub[jcol + 1]--; - continue; - } - i++; - } - } - - if (milu == SMILU_2) { - sum->r = c_abs1(sum); sum->i = 0.0; - } - if (milu == SMILU_3) sum->i = 0.0; - - *nnzUj += m; - - return 0; -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_dcopy_to_ucol.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_dcopy_to_ucol.c.bak deleted file mode 100644 index a27a148..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_dcopy_to_ucol.c.bak +++ /dev/null @@ -1,199 +0,0 @@ - -/*! @file ilu_dcopy_to_ucol.c - * \brief Copy a computed column of U to the compressed data structure - * and drop some small entries - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory - * June 30, 2009 - * </pre> - */ - -#include "slu_ddefs.h" - -#ifdef DEBUG -int num_drop_U; -#endif - -static double *A; /* used in _compare_ only */ -static int _compare_(const void *a, const void *b) -{ - register int *x = (int *)a, *y = (int *)b; - register double xx = fabs(A[*x]), yy = fabs(A[*y]); - if (xx > yy) return -1; - else if (xx < yy) return 1; - else return 0; -} - - -int -ilu_dcopy_to_ucol( - int jcol, /* in */ - int nseg, /* in */ - int *segrep, /* in */ - int *repfnz, /* in */ - int *perm_r, /* in */ - double *dense, /* modified - reset to zero on return */ - int drop_rule,/* in */ - milu_t milu, /* in */ - double drop_tol, /* in */ - int quota, /* maximum nonzero entries allowed */ - double *sum, /* out - the sum of dropped entries */ - int *nnzUj, /* in - out */ - GlobalLU_t *Glu, /* modified */ - int *work /* working space with minimum size n, - * used by the second dropping rule */ - ) -{ -/* - * Gather from SPA dense[*] to global ucol[*]. - */ - int ksub, krep, ksupno; - int i, k, kfnz, segsze; - int fsupc, isub, irow; - int jsupno, nextu; - int new_next, mem_error; - int *xsup, *supno; - int *lsub, *xlsub; - double *ucol; - int *usub, *xusub; - int nzumax; - int m; /* number of entries in the nonzero U-segments */ - register double d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); - register double tmp; - double zero = 0.0; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - ucol = Glu->ucol; - usub = Glu->usub; - xusub = Glu->xusub; - nzumax = Glu->nzumax; - - *sum = zero; - if (drop_rule == NODROP) { - drop_tol = -1.0, quota = Glu->n; - } - - jsupno = supno[jcol]; - nextu = xusub[jcol]; - k = nseg - 1; - for (ksub = 0; ksub < nseg; ksub++) { - krep = segrep[k--]; - ksupno = supno[krep]; - - if ( ksupno != jsupno ) { /* Should go into ucol[] */ - kfnz = repfnz[krep]; - if ( kfnz != EMPTY ) { /* Nonzero U-segment */ - - fsupc = xsup[ksupno]; - isub = xlsub[fsupc] + kfnz - fsupc; - segsze = krep - kfnz + 1; - - new_next = nextu + segsze; - while ( new_next > nzumax ) { - if ((mem_error = dLUMemXpand(jcol, nextu, UCOL, &nzumax, - Glu)) != 0) - return (mem_error); - ucol = Glu->ucol; - if ((mem_error = dLUMemXpand(jcol, nextu, USUB, &nzumax, - Glu)) != 0) - return (mem_error); - usub = Glu->usub; - lsub = Glu->lsub; - } - - for (i = 0; i < segsze; i++) { - irow = lsub[isub++]; - tmp = fabs(dense[irow]); - - /* first dropping rule */ - if (quota > 0 && tmp >= drop_tol) { - if (tmp > d_max) d_max = tmp; - if (tmp < d_min) d_min = tmp; - usub[nextu] = perm_r[irow]; - ucol[nextu] = dense[irow]; - nextu++; - } else { - switch (milu) { - case SMILU_1: - case SMILU_2: - *sum += dense[irow]; - break; - case SMILU_3: - /* *sum += fabs(dense[irow]);*/ - *sum += tmp; - break; - case SILU: - default: - break; - } -#ifdef DEBUG - num_drop_U++; -#endif - } - dense[irow] = zero; - } - - } - - } - - } /* for each segment... */ - - xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ - m = xusub[jcol + 1] - xusub[jcol]; - - /* second dropping rule */ - if (drop_rule & DROP_SECONDARY && m > quota) { - register double tol = d_max; - register int m0 = xusub[jcol] + m - 1; - - if (quota > 0) { - if (drop_rule & DROP_INTERP) { - d_max = 1.0 / d_max; d_min = 1.0 / d_min; - tol = 1.0 / (d_max + (d_min - d_max) * quota / m); - } else { - A = &ucol[xusub[jcol]]; - for (i = 0; i < m; i++) work[i] = i; - qsort(work, m, sizeof(int), _compare_); - tol = fabs(usub[xusub[jcol] + work[quota]]); - } - } - for (i = xusub[jcol]; i <= m0; ) { - if (fabs(ucol[i]) <= tol) { - switch (milu) { - case SMILU_1: - case SMILU_2: - *sum += ucol[i]; - break; - case SMILU_3: - *sum += fabs(ucol[i]); - break; - case SILU: - default: - break; - } - ucol[i] = ucol[m0]; - usub[i] = usub[m0]; - m0--; - m--; -#ifdef DEBUG - num_drop_U++; -#endif - xusub[jcol + 1]--; - continue; - } - i++; - } - } - - if (milu == SMILU_2) *sum = fabs(*sum); - - *nnzUj += m; - - return 0; -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ddrop_row.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ddrop_row.c.bak deleted file mode 100644 index caffa13..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ddrop_row.c.bak +++ /dev/null @@ -1,311 +0,0 @@ - -/*! @file ilu_ddrop_row.c - * \brief Drop small rows from L - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory. - * June 30, 2009 - * <\pre> - */ - -#include <math.h> -#include <stdlib.h> -#include "slu_ddefs.h" - -extern void dswap_(int *, double [], int *, double [], int *); -extern void daxpy_(int *, double *, double [], int *, double [], int *); - -static double *A; /* used in _compare_ only */ -static int _compare_(const void *a, const void *b) -{ - register int *x = (int *)a, *y = (int *)b; - if (A[*x] - A[*y] > 0.0) return -1; - else if (A[*x] - A[*y] < 0.0) return 1; - else return 0; -} - -/*! \brief - * <pre> - * Purpose - * ======= - * ilu_ddrop_row() - Drop some small rows from the previous - * supernode (L-part only). - * </pre> - */ -int ilu_ddrop_row( - superlu_options_t *options, /* options */ - int first, /* index of the first column in the supernode */ - int last, /* index of the last column in the supernode */ - double drop_tol, /* dropping parameter */ - int quota, /* maximum nonzero entries allowed */ - int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */ - double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots, - * does not change if options->ILU_MILU != SMILU1 */ - GlobalLU_t *Glu, /* modified */ - double dwork[], /* working space with minimum size last-first+1 */ - int iwork[], /* working space with minimum size m - n, - * used by the second dropping rule */ - int lastc /* if lastc == 0, there is nothing after the - * working supernode [first:last]; - * if lastc == 1, there is one more column after - * the working supernode. */ ) -{ - register int i, j, k, m1; - register int nzlc; /* number of nonzeros in column last+1 */ - register int xlusup_first, xlsub_first; - int m, n; /* m x n is the size of the supernode */ - int r = 0; /* number of dropped rows */ - register double *temp; - register double *lusup = Glu->lusup; - register int *lsub = Glu->lsub; - register int *xlsub = Glu->xlsub; - register int *xlusup = Glu->xlusup; - register double d_max = 0.0, d_min = 1.0; - int drop_rule = options->ILU_DropRule; - milu_t milu = options->ILU_MILU; - norm_t nrm = options->ILU_Norm; - double zero = 0.0; - double one = 1.0; - double none = -1.0; - int inc_diag; /* inc_diag = m + 1 */ - int nzp = 0; /* number of zero pivots */ - - xlusup_first = xlusup[first]; - xlsub_first = xlsub[first]; - m = xlusup[first + 1] - xlusup_first; - n = last - first + 1; - m1 = m - 1; - inc_diag = m + 1; - nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0; - temp = dwork - n; - - /* Quick return if nothing to do. */ - if (m == 0 || m == n || drop_rule == NODROP) - { - *nnzLj += m * n; - return 0; - } - - /* basic dropping: ILU(tau) */ - for (i = n; i <= m1; ) - { - /* the average abs value of ith row */ - switch (nrm) - { - case ONE_NORM: - temp[i] = dasum_(&n, &lusup[xlusup_first + i], &m) / (double)n; - break; - case TWO_NORM: - temp[i] = dnrm2_(&n, &lusup[xlusup_first + i], &m) - / sqrt((double)n); - break; - case INF_NORM: - default: - k = idamax_(&n, &lusup[xlusup_first + i], &m) - 1; - temp[i] = fabs(lusup[xlusup_first + i + m * k]); - break; - } - - /* drop small entries due to drop_tol */ - if (drop_rule & DROP_BASIC && temp[i] < drop_tol) - { - r++; - /* drop the current row and move the last undropped row here */ - if (r > 1) /* add to last row */ - { - /* accumulate the sum (for MILU) */ - switch (milu) - { - case SMILU_1: - case SMILU_2: - daxpy_(&n, &one, &lusup[xlusup_first + i], &m, - &lusup[xlusup_first + m - 1], &m); - break; - case SMILU_3: - for (j = 0; j < n; j++) - lusup[xlusup_first + (m - 1) + j * m] += - fabs(lusup[xlusup_first + i + j * m]); - break; - case SILU: - default: - break; - } - dcopy_(&n, &lusup[xlusup_first + m1], &m, - &lusup[xlusup_first + i], &m); - } /* if (r > 1) */ - else /* move to last row */ - { - dswap_(&n, &lusup[xlusup_first + m1], &m, - &lusup[xlusup_first + i], &m); - if (milu == SMILU_3) - for (j = 0; j < n; j++) { - lusup[xlusup_first + m1 + j * m] = - fabs(lusup[xlusup_first + m1 + j * m]); - } - } - lsub[xlsub_first + i] = lsub[xlsub_first + m1]; - m1--; - continue; - } /* if dropping */ - else - { - if (temp[i] > d_max) d_max = temp[i]; - if (temp[i] < d_min) d_min = temp[i]; - } - i++; - } /* for */ - - /* Secondary dropping: drop more rows according to the quota. */ - quota = ceil((double)quota / (double)n); - if (drop_rule & DROP_SECONDARY && m - r > quota) - { - register double tol = d_max; - - /* Calculate the second dropping tolerance */ - if (quota > n) - { - if (drop_rule & DROP_INTERP) /* by interpolation */ - { - d_max = 1.0 / d_max; d_min = 1.0 / d_min; - tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r)); - } - else /* by quick sort */ - { - register int *itemp = iwork - n; - A = temp; - for (i = n; i <= m1; i++) itemp[i] = i; - qsort(iwork, m1 - n + 1, sizeof(int), _compare_); - printf("new quota = %d, iwork size for initilization = %d\n", - quota, m1-n+1); - /* tol = temp[iwork[quota]]; bug! */ - assert( n <= quota <= m1 ); - tol = temp[itemp[quota]]; - } - } - - for (i = n; i <= m1; ) - { - if (temp[i] <= tol) - { - register int j; - r++; - /* drop the current row and move the last undropped row here */ - if (r > 1) /* add to last row */ - { - /* accumulate the sum (for MILU) */ - switch (milu) - { - case SMILU_1: - case SMILU_2: - daxpy_(&n, &one, &lusup[xlusup_first + i], &m, - &lusup[xlusup_first + m - 1], &m); - break; - case SMILU_3: - for (j = 0; j < n; j++) - lusup[xlusup_first + (m - 1) + j * m] += - fabs(lusup[xlusup_first + i + j * m]); - break; - case SILU: - default: - break; - } - dcopy_(&n, &lusup[xlusup_first + m1], &m, - &lusup[xlusup_first + i], &m); - } /* if (r > 1) */ - else /* move to last row */ - { - dswap_(&n, &lusup[xlusup_first + m1], &m, - &lusup[xlusup_first + i], &m); - if (milu == SMILU_3) - for (j = 0; j < n; j++) { - lusup[xlusup_first + m1 + j * m] = - fabs(lusup[xlusup_first + m1 + j * m]); - } - } - lsub[xlsub_first + i] = lsub[xlsub_first + m1]; - m1--; - temp[i] = temp[m1]; - - continue; - } - i++; - - } /* for */ - - } /* if secondary dropping */ - - for (i = n; i < m; i++) temp[i] = 0.0; - - if (r == 0) - { - *nnzLj += m * n; - return 0; - } - - /* add dropped entries to the diagnal */ - if (milu != SILU) - { - register int j; - double t; - for (j = 0; j < n; j++) - { - t = lusup[xlusup_first + (m - 1) + j * m] * MILU_ALPHA; - switch (milu) - { - case SMILU_1: - if (t != none) { - lusup[xlusup_first + j * inc_diag] *= (one + t); - } - else - { - lusup[xlusup_first + j * inc_diag] *= *fill_tol; -#ifdef DEBUG - printf("[1] ZERO PIVOT: FILL col %d.\n", first + j); - fflush(stdout); -#endif - nzp++; - } - break; - case SMILU_2: - lusup[xlusup_first + j * inc_diag] *= (1.0 + fabs(t)); - break; - case SMILU_3: - lusup[xlusup_first + j * inc_diag] *= (one + t); - break; - case SILU: - default: - break; - } - } - if (nzp > 0) *fill_tol = -nzp; - } - - /* Remove dropped entries from the memory and fix the pointers. */ - m1 = m - r; - for (j = 1; j < n; j++) - { - register int tmp1, tmp2; - tmp1 = xlusup_first + j * m1; - tmp2 = xlusup_first + j * m; - for (i = 0; i < m1; i++) - lusup[i + tmp1] = lusup[i + tmp2]; - } - for (i = 0; i < nzlc; i++) - lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m]; - for (i = 0; i < nzlc; i++) - lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i]; - for (i = first + 1; i <= last + 1; i++) - { - xlusup[i] -= r * (i - first); - xlsub[i] -= r; - } - if (lastc) - { - xlusup[last + 2] -= r * n; - xlsub[last + 2] -= r; - } - - *nnzLj += (m - r) * n; - return r; -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_scopy_to_ucol.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_scopy_to_ucol.c.bak deleted file mode 100644 index 2b3bc70..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_scopy_to_ucol.c.bak +++ /dev/null @@ -1,199 +0,0 @@ - -/*! @file ilu_scopy_to_ucol.c - * \brief Copy a computed column of U to the compressed data structure - * and drop some small entries - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory - * June 30, 2009 - * </pre> - */ - -#include "slu_sdefs.h" - -#ifdef DEBUG -int num_drop_U; -#endif - -static float *A; /* used in _compare_ only */ -static int _compare_(const void *a, const void *b) -{ - register int *x = (int *)a, *y = (int *)b; - register double xx = fabs(A[*x]), yy = fabs(A[*y]); - if (xx > yy) return -1; - else if (xx < yy) return 1; - else return 0; -} - - -int -ilu_scopy_to_ucol( - int jcol, /* in */ - int nseg, /* in */ - int *segrep, /* in */ - int *repfnz, /* in */ - int *perm_r, /* in */ - float *dense, /* modified - reset to zero on return */ - int drop_rule,/* in */ - milu_t milu, /* in */ - double drop_tol, /* in */ - int quota, /* maximum nonzero entries allowed */ - float *sum, /* out - the sum of dropped entries */ - int *nnzUj, /* in - out */ - GlobalLU_t *Glu, /* modified */ - int *work /* working space with minimum size n, - * used by the second dropping rule */ - ) -{ -/* - * Gather from SPA dense[*] to global ucol[*]. - */ - int ksub, krep, ksupno; - int i, k, kfnz, segsze; - int fsupc, isub, irow; - int jsupno, nextu; - int new_next, mem_error; - int *xsup, *supno; - int *lsub, *xlsub; - float *ucol; - int *usub, *xusub; - int nzumax; - int m; /* number of entries in the nonzero U-segments */ - register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); - register double tmp; - float zero = 0.0; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - ucol = Glu->ucol; - usub = Glu->usub; - xusub = Glu->xusub; - nzumax = Glu->nzumax; - - *sum = zero; - if (drop_rule == NODROP) { - drop_tol = -1.0, quota = Glu->n; - } - - jsupno = supno[jcol]; - nextu = xusub[jcol]; - k = nseg - 1; - for (ksub = 0; ksub < nseg; ksub++) { - krep = segrep[k--]; - ksupno = supno[krep]; - - if ( ksupno != jsupno ) { /* Should go into ucol[] */ - kfnz = repfnz[krep]; - if ( kfnz != EMPTY ) { /* Nonzero U-segment */ - - fsupc = xsup[ksupno]; - isub = xlsub[fsupc] + kfnz - fsupc; - segsze = krep - kfnz + 1; - - new_next = nextu + segsze; - while ( new_next > nzumax ) { - if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, - Glu)) != 0) - return (mem_error); - ucol = Glu->ucol; - if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, - Glu)) != 0) - return (mem_error); - usub = Glu->usub; - lsub = Glu->lsub; - } - - for (i = 0; i < segsze; i++) { - irow = lsub[isub++]; - tmp = fabs(dense[irow]); - - /* first dropping rule */ - if (quota > 0 && tmp >= drop_tol) { - if (tmp > d_max) d_max = tmp; - if (tmp < d_min) d_min = tmp; - usub[nextu] = perm_r[irow]; - ucol[nextu] = dense[irow]; - nextu++; - } else { - switch (milu) { - case SMILU_1: - case SMILU_2: - *sum += dense[irow]; - break; - case SMILU_3: - /* *sum += fabs(dense[irow]);*/ - *sum += tmp; - break; - case SILU: - default: - break; - } -#ifdef DEBUG - num_drop_U++; -#endif - } - dense[irow] = zero; - } - - } - - } - - } /* for each segment... */ - - xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ - m = xusub[jcol + 1] - xusub[jcol]; - - /* second dropping rule */ - if (drop_rule & DROP_SECONDARY && m > quota) { - register double tol = d_max; - register int m0 = xusub[jcol] + m - 1; - - if (quota > 0) { - if (drop_rule & DROP_INTERP) { - d_max = 1.0 / d_max; d_min = 1.0 / d_min; - tol = 1.0 / (d_max + (d_min - d_max) * quota / m); - } else { - A = &ucol[xusub[jcol]]; - for (i = 0; i < m; i++) work[i] = i; - qsort(work, m, sizeof(int), _compare_); - tol = fabs(usub[xusub[jcol] + work[quota]]); - } - } - for (i = xusub[jcol]; i <= m0; ) { - if (fabs(ucol[i]) <= tol) { - switch (milu) { - case SMILU_1: - case SMILU_2: - *sum += ucol[i]; - break; - case SMILU_3: - *sum += fabs(ucol[i]); - break; - case SILU: - default: - break; - } - ucol[i] = ucol[m0]; - usub[i] = usub[m0]; - m0--; - m--; -#ifdef DEBUG - num_drop_U++; -#endif - xusub[jcol + 1]--; - continue; - } - i++; - } - } - - if (milu == SMILU_2) *sum = fabs(*sum); - - *nnzUj += m; - - return 0; -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_zcopy_to_ucol.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_zcopy_to_ucol.c.bak deleted file mode 100644 index 9859c2f..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_zcopy_to_ucol.c.bak +++ /dev/null @@ -1,202 +0,0 @@ - -/*! @file ilu_zcopy_to_ucol.c - * \brief Copy a computed column of U to the compressed data structure - * and drop some small entries - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory - * June 30, 2009 - * </pre> - */ - -#include "slu_zdefs.h" - -#ifdef DEBUG -int num_drop_U; -#endif - -static doublecomplex *A; /* used in _compare_ only */ -static int _compare_(const void *a, const void *b) -{ - register int *x = (int *)a, *y = (int *)b; - register double xx = z_abs1(&A[*x]), yy = z_abs1(&A[*y]); - if (xx > yy) return -1; - else if (xx < yy) return 1; - else return 0; -} - - -int -ilu_zcopy_to_ucol( - int jcol, /* in */ - int nseg, /* in */ - int *segrep, /* in */ - int *repfnz, /* in */ - int *perm_r, /* in */ - doublecomplex *dense, /* modified - reset to zero on return */ - int drop_rule,/* in */ - milu_t milu, /* in */ - double drop_tol, /* in */ - int quota, /* maximum nonzero entries allowed */ - doublecomplex *sum, /* out - the sum of dropped entries */ - int *nnzUj, /* in - out */ - GlobalLU_t *Glu, /* modified */ - int *work /* working space with minimum size n, - * used by the second dropping rule */ - ) -{ -/* - * Gather from SPA dense[*] to global ucol[*]. - */ - int ksub, krep, ksupno; - int i, k, kfnz, segsze; - int fsupc, isub, irow; - int jsupno, nextu; - int new_next, mem_error; - int *xsup, *supno; - int *lsub, *xlsub; - doublecomplex *ucol; - int *usub, *xusub; - int nzumax; - int m; /* number of entries in the nonzero U-segments */ - register double d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); - register double tmp; - doublecomplex zero = {0.0, 0.0}; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - ucol = Glu->ucol; - usub = Glu->usub; - xusub = Glu->xusub; - nzumax = Glu->nzumax; - - *sum = zero; - if (drop_rule == NODROP) { - drop_tol = -1.0, quota = Glu->n; - } - - jsupno = supno[jcol]; - nextu = xusub[jcol]; - k = nseg - 1; - for (ksub = 0; ksub < nseg; ksub++) { - krep = segrep[k--]; - ksupno = supno[krep]; - - if ( ksupno != jsupno ) { /* Should go into ucol[] */ - kfnz = repfnz[krep]; - if ( kfnz != EMPTY ) { /* Nonzero U-segment */ - - fsupc = xsup[ksupno]; - isub = xlsub[fsupc] + kfnz - fsupc; - segsze = krep - kfnz + 1; - - new_next = nextu + segsze; - while ( new_next > nzumax ) { - if ((mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax, - Glu)) != 0) - return (mem_error); - ucol = Glu->ucol; - if ((mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax, - Glu)) != 0) - return (mem_error); - usub = Glu->usub; - lsub = Glu->lsub; - } - - for (i = 0; i < segsze; i++) { - irow = lsub[isub++]; - tmp = z_abs1(&dense[irow]); - - /* first dropping rule */ - if (quota > 0 && tmp >= drop_tol) { - if (tmp > d_max) d_max = tmp; - if (tmp < d_min) d_min = tmp; - usub[nextu] = perm_r[irow]; - ucol[nextu] = dense[irow]; - nextu++; - } else { - switch (milu) { - case SMILU_1: - case SMILU_2: - z_add(sum, sum, &dense[irow]); - break; - case SMILU_3: - /* *sum += fabs(dense[irow]);*/ - sum->r += tmp; - break; - case SILU: - default: - break; - } -#ifdef DEBUG - num_drop_U++; -#endif - } - dense[irow] = zero; - } - - } - - } - - } /* for each segment... */ - - xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ - m = xusub[jcol + 1] - xusub[jcol]; - - /* second dropping rule */ - if (drop_rule & DROP_SECONDARY && m > quota) { - register double tol = d_max; - register int m0 = xusub[jcol] + m - 1; - - if (quota > 0) { - if (drop_rule & DROP_INTERP) { - d_max = 1.0 / d_max; d_min = 1.0 / d_min; - tol = 1.0 / (d_max + (d_min - d_max) * quota / m); - } else { - A = &ucol[xusub[jcol]]; - for (i = 0; i < m; i++) work[i] = i; - qsort(work, m, sizeof(int), _compare_); - tol = fabs(usub[xusub[jcol] + work[quota]]); - } - } - for (i = xusub[jcol]; i <= m0; ) { - if (z_abs1(&ucol[i]) <= tol) { - switch (milu) { - case SMILU_1: - case SMILU_2: - z_add(sum, sum, &ucol[i]); - break; - case SMILU_3: - sum->r += tmp; - break; - case SILU: - default: - break; - } - ucol[i] = ucol[m0]; - usub[i] = usub[m0]; - m0--; - m--; -#ifdef DEBUG - num_drop_U++; -#endif - xusub[jcol + 1]--; - continue; - } - i++; - } - } - - if (milu == SMILU_2) { - sum->r = z_abs1(sum); sum->i = 0.0; - } - if (milu == SMILU_3) sum->i = 0.0; - - *nnzUj += m; - - return 0; -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/mc64ad.f.bak b/thirdparty/superlu/SuperLU_4.1/SRC/mc64ad.f.bak deleted file mode 100644 index c68c4d7..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/mc64ad.f.bak +++ /dev/null @@ -1,2002 +0,0 @@ -CCCCC COPYRIGHT (c) 1999 Council for the Central Laboratory of the -CCCCC Research Councils. All rights reserved. -CCCCC PACKAGE MC64A/AD -CCCCC AUTHORS Iain Duff (i.duff@rl.ac.uk) and Jacko Koster (jak@ii.uib.no) -CCCCC LAST UPDATE 20/09/99 -CCCCC -C *** Conditions on external use *** -C -C The user shall acknowledge the contribution of this -C package in any publication of material dependent upon the use of -C the package. The user shall use reasonable endeavours to notify -C the authors of the package of this publication. -C -C The user can modify this code but, at no time -C shall the right or title to all or any part of this package pass -C to the user. The user shall make available free of charge -C to the authors for any purpose all information relating to any -C alteration or addition made to this package for the purposes of -C extending the capabilities or enhancing the performance of this -C package. -C -C The user shall not pass this code directly to a third party without the -C express prior consent of the authors. Users wanting to licence their -C own copy of these routines should send email to hsl@aeat.co.uk -C -C None of the comments from the Copyright notice up to and including this -C one shall be removed or altered in any way. - -C********************************************************************** - SUBROUTINE MC64ID(ICNTL) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C -C Purpose -C ======= -C -C The components of the array ICNTL control the action of MC64A/AD. -C Default values for these are set in this subroutine. -C -C Parameters -C ========== -C - INTEGER ICNTL(10) -C -C Local variables - INTEGER I -C -C ICNTL(1) has default value 6. -C It is the output stream for error messages. If it -C is negative, these messages will be suppressed. -C -C ICNTL(2) has default value 6. -C It is the output stream for warning messages. -C If it is negative, these messages are suppressed. -C -C ICNTL(3) has default value -1. -C It is the output stream for monitoring printing. -C If it is negative, these messages are suppressed. -C -C ICNTL(4) has default value 0. -C If left at the defaut value, the incoming data is checked for -C out-of-range indices and duplicates. Setting ICNTL(4) to any -C other will avoid the checks but is likely to cause problems -C later if out-of-range indices or duplicates are present. -C The user should only set ICNTL(4) non-zero, if the data is -C known to avoid these problems. -C -C ICNTL(5) to ICNTL(10) are not used by MC64A/AD but are set to -C zero in this routine. - -C Initialization of the ICNTL array. - ICNTL(1) = 6 - ICNTL(2) = 6 - ICNTL(3) = -1 - DO 10 I = 4,10 - ICNTL(I) = 0 - 10 CONTINUE - - RETURN - END - -C********************************************************************** - SUBROUTINE MC64AD(JOB,N,NE,IP,IRN,A,NUM,CPERM,LIW,IW,LDW,DW, - & ICNTL,INFO) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C -C Purpose -C ======= -C -C This subroutine attempts to find a column permutation for an NxN -C sparse matrix A = {a_ij} that makes the permuted matrix have N -C entries on its diagonal. -C If the matrix is structurally nonsingular, the subroutine optionally -C returns a column permutation that maximizes the smallest element -C on the diagonal, maximizes the sum of the diagonal entries, or -C maximizes the product of the diagonal entries of the permuted matrix. -C For the latter option, the subroutine also finds scaling factors -C that may be used to scale the matrix so that the nonzero diagonal -C entries of the permuted matrix are one in absolute value and all the -C off-diagonal entries are less than or equal to one in absolute value. -C The natural logarithms of the scaling factors u(i), i=1..N, for the -C rows and v(j), j=1..N, for the columns are returned so that the -C scaled matrix B = {b_ij} has entries b_ij = a_ij * EXP(u_i + v_j). -C -C Parameters -C ========== -C - INTEGER JOB,N,NE,NUM,LIW,LDW - INTEGER IP(N+1),IRN(NE),CPERM(N),IW(LIW),ICNTL(10),INFO(10) - DOUBLE PRECISION A(NE),DW(LDW) -C -C JOB is an INTEGER variable which must be set by the user to -C control the action. It is not altered by the subroutine. -C Possible values for JOB are: -C 1 Compute a column permutation of the matrix so that the -C permuted matrix has as many entries on its diagonal as possible. -C The values on the diagonal are of arbitrary size. HSL subroutine -C MC21A/AD is used for this. See [1]. -C 2 Compute a column permutation of the matrix so that the smallest -C value on the diagonal of the permuted matrix is maximized. -C See [3]. -C 3 Compute a column permutation of the matrix so that the smallest -C value on the diagonal of the permuted matrix is maximized. -C The algorithm differs from the one used for JOB = 2 and may -C have quite a different performance. See [2]. -C 4 Compute a column permutation of the matrix so that the sum -C of the diagonal entries of the permuted matrix is maximized. -C See [3]. -C 5 Compute a column permutation of the matrix so that the product -C of the diagonal entries of the permuted matrix is maximized -C and vectors to scale the matrix so that the nonzero diagonal -C entries of the permuted matrix are one in absolute value and -C all the off-diagonal entries are less than or equal to one in -C absolute value. See [3]. -C Restriction: 1 <= JOB <= 5. -C -C N is an INTEGER variable which must be set by the user to the -C order of the matrix A. It is not altered by the subroutine. -C Restriction: N >= 1. -C -C NE is an INTEGER variable which must be set by the user to the -C number of entries in the matrix. It is not altered by the -C subroutine. -C Restriction: NE >= 1. -C -C IP is an INTEGER array of length N+1. -C IP(J), J=1..N, must be set by the user to the position in array IRN -C of the first row index of an entry in column J. IP(N+1) must be set -C to NE+1. It is not altered by the subroutine. -C -C IRN is an INTEGER array of length NE. -C IRN(K), K=1..NE, must be set by the user to hold the row indices of -C the entries of the matrix. Those belonging to column J must be -C stored contiguously in the positions IP(J)..IP(J+1)-1. The ordering -C of the row indices within each column is unimportant. Repeated -C entries are not allowed. The array IRN is not altered by the -C subroutine. -C -C A is a REAL (DOUBLE PRECISION in the D-version) array of length NE. -C The user must set A(K), K=1..NE, to the numerical value of the -C entry that corresponds to IRN(K). -C It is not used by the subroutine when JOB = 1. -C It is not altered by the subroutine. -C -C NUM is an INTEGER variable that need not be set by the user. -C On successful exit, NUM will be the number of entries on the -C diagonal of the permuted matrix. -C If NUM < N, the matrix is structurally singular. -C -C CPERM is an INTEGER array of length N that need not be set by the -C user. On successful exit, CPERM contains the column permutation. -C Column CPERM(J) of the original matrix is column J in the permuted -C matrix, J=1..N. -C -C LIW is an INTEGER variable that must be set by the user to -C the dimension of array IW. It is not altered by the subroutine. -C Restriction: -C JOB = 1 : LIW >= 5N -C JOB = 2 : LIW >= 4N -C JOB = 3 : LIW >= 10N + NE -C JOB = 4 : LIW >= 5N -C JOB = 5 : LIW >= 5N -C -C IW is an INTEGER array of length LIW that is used for workspace. -C -C LDW is an INTEGER variable that must be set by the user to the -C dimension of array DW. It is not altered by the subroutine. -C Restriction: -C JOB = 1 : LDW is not used -C JOB = 2 : LDW >= N -C JOB = 3 : LDW >= NE -C JOB = 4 : LDW >= 2N + NE -C JOB = 5 : LDW >= 3N + NE -C -C DW is a REAL (DOUBLE PRECISION in the D-version) array of length LDW -C that is used for workspace. If JOB = 5, on return, -C DW(i) contains u_i, i=1..N, and DW(N+j) contains v_j, j=1..N. -C -C ICNTL is an INTEGER array of length 10. Its components control the -C output of MC64A/AD and must be set by the user before calling -C MC64A/AD. They are not altered by the subroutine. -C -C ICNTL(1) must be set to specify the output stream for -C error messages. If ICNTL(1) < 0, messages are suppressed. -C The default value set by MC46I/ID is 6. -C -C ICNTL(2) must be set by the user to specify the output stream for -C warning messages. If ICNTL(2) < 0, messages are suppressed. -C The default value set by MC46I/ID is 6. -C -C ICNTL(3) must be set by the user to specify the output stream for -C diagnostic messages. If ICNTL(3) < 0, messages are suppressed. -C The default value set by MC46I/ID is -1. -C -C ICNTL(4) must be set by the user to a value other than 0 to avoid -C checking of the input data. -C The default value set by MC46I/ID is 0. -C -C INFO is an INTEGER array of length 10 which need not be set by the -C user. INFO(1) is set non-negative to indicate success. A negative -C value is returned if an error occurred, a positive value if a -C warning occurred. INFO(2) holds further information on the error. -C On exit from the subroutine, INFO(1) will take one of the -C following values: -C 0 : successful entry (for structurally nonsingular matrix). -C +1 : successful entry (for structurally singular matrix). -C +2 : the returned scaling factors are large and may cause -C overflow when used to scale the matrix. -C (For JOB = 5 entry only.) -C -1 : JOB < 1 or JOB > 5. Value of JOB held in INFO(2). -C -2 : N < 1. Value of N held in INFO(2). -C -3 : NE < 1. Value of NE held in INFO(2). -C -4 : the defined length LIW violates the restriction on LIW. -C Value of LIW required given by INFO(2). -C -5 : the defined length LDW violates the restriction on LDW. -C Value of LDW required given by INFO(2). -C -6 : entries are found whose row indices are out of range. INFO(2) -C contains the index of a column in which such an entry is found. -C -7 : repeated entries are found. INFO(2) contains the index of a -C column in which such entries are found. -C INFO(3) to INFO(10) are not currently used and are set to zero by -C the routine. -C -C References: -C [1] I. S. Duff, (1981), -C "Algorithm 575. Permutations for a zero-free diagonal", -C ACM Trans. Math. Software 7(3), 387-390. -C [2] I. S. Duff and J. Koster, (1998), -C "The design and use of algorithms for permuting large -C entries to the diagonal of sparse matrices", -C SIAM J. Matrix Anal. Appl., vol. 20, no. 4, pp. 889-901. -C [3] I. S. Duff and J. Koster, (1999), -C "On algorithms for permuting large entries to the diagonal -C of sparse matrices", -C Technical Report RAL-TR-1999-030, RAL, Oxfordshire, England. - -C Local variables and parameters - INTEGER I,J,K - DOUBLE PRECISION FACT,ZERO,RINF - PARAMETER (ZERO=0.0D+00) -C External routines and functions -c EXTERNAL FD05AD -c DOUBLE PRECISION FD05AD - EXTERNAL MC21AD,MC64BD,MC64RD,MC64SD,MC64WD, DLAMCH - DOUBLE PRECISION DLAMCH -C Intrinsic functions - INTRINSIC ABS,LOG - -C Set RINF to largest positive real number (infinity) -c XSL RINF = FD05AD(5) - RINF = DLAMCH('Overflow') - -C Check value of JOB - IF (JOB.LT.1 .OR. JOB.GT.5) THEN - INFO(1) = -1 - INFO(2) = JOB - IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9001) INFO(1),'JOB',JOB - GO TO 99 - ENDIF -C Check value of N - IF (N.LT.1) THEN - INFO(1) = -2 - INFO(2) = N - IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9001) INFO(1),'N',N - GO TO 99 - ENDIF -C Check value of NE - IF (NE.LT.1) THEN - INFO(1) = -3 - INFO(2) = NE - IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9001) INFO(1),'NE',NE - GO TO 99 - ENDIF -C Check LIW - IF (JOB.EQ.1) K = 5*N - IF (JOB.EQ.2) K = 4*N - IF (JOB.EQ.3) K = 10*N + NE - IF (JOB.EQ.4) K = 5*N - IF (JOB.EQ.5) K = 5*N - IF (LIW.LT.K) THEN - INFO(1) = -4 - INFO(2) = K - IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9004) INFO(1),K - GO TO 99 - ENDIF -C Check LDW -C If JOB = 1, do not check - IF (JOB.GT.1) THEN - IF (JOB.EQ.2) K = N - IF (JOB.EQ.3) K = NE - IF (JOB.EQ.4) K = 2*N + NE - IF (JOB.EQ.5) K = 3*N + NE - IF (LDW.LT.K) THEN - INFO(1) = -5 - INFO(2) = K - IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9005) INFO(1),K - GO TO 99 - ENDIF - ENDIF - IF (ICNTL(4).EQ.0) THEN -C Check row indices. Use IW(1:N) as workspace - DO 3 I = 1,N - IW(I) = 0 - 3 CONTINUE - DO 6 J = 1,N - DO 4 K = IP(J),IP(J+1)-1 - I = IRN(K) -C Check for row indices that are out of range - IF (I.LT.1 .OR. I.GT.N) THEN - INFO(1) = -6 - INFO(2) = J - IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9006) INFO(1),J,I - GO TO 99 - ENDIF -C Check for repeated row indices within a column - IF (IW(I).EQ.J) THEN - INFO(1) = -7 - INFO(2) = J - IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9007) INFO(1),J,I - GO TO 99 - ELSE - IW(I) = J - ENDIF - 4 CONTINUE - 6 CONTINUE - ENDIF - -C Print diagnostics on input - IF (ICNTL(3).GE.0) THEN - WRITE(ICNTL(3),9020) JOB,N,NE - WRITE(ICNTL(3),9021) (IP(J),J=1,N+1) - WRITE(ICNTL(3),9022) (IRN(J),J=1,NE) - IF (JOB.GT.1) WRITE(ICNTL(3),9023) (A(J),J=1,NE) - ENDIF - -C Set components of INFO to zero - DO 8 I=1,10 - INFO(I) = 0 - 8 CONTINUE - -C Compute maximum matching with MC21A/AD - IF (JOB.EQ.1) THEN -C Put length of column J in IW(J) - DO 10 J = 1,N - IW(J) = IP(J+1) - IP(J) - 10 CONTINUE -C IW(N+1:5N) is workspace - CALL MC21AD(N,IRN,NE,IP,IW(1),CPERM,NUM,IW(N+1)) - GO TO 90 - ENDIF - -C Compute bottleneck matching - IF (JOB.EQ.2) THEN -C IW(1:5N), DW(1:N) are workspaces - CALL MC64BD(N,NE,IP,IRN,A,CPERM,NUM, - & IW(1),IW(N+1),IW(2*N+1),IW(3*N+1),DW) - GO TO 90 - ENDIF - -C Compute bottleneck matching - IF (JOB.EQ.3) THEN -C Copy IRN(K) into IW(K), ABS(A(K)) into DW(K), K=1..NE - DO 20 K = 1,NE - IW(K) = IRN(K) - DW(K) = ABS(A(K)) - 20 CONTINUE -C Sort entries in each column by decreasing value. - CALL MC64RD(N,NE,IP,IW,DW) -C IW(NE+1:NE+10N) is workspace - CALL MC64SD(N,NE,IP,IW(1),DW,CPERM,NUM,IW(NE+1), - & IW(NE+N+1),IW(NE+2*N+1),IW(NE+3*N+1),IW(NE+4*N+1), - & IW(NE+5*N+1),IW(NE+6*N+1)) - GO TO 90 - ENDIF - - IF (JOB.EQ.4) THEN - DO 50 J = 1,N - FACT = ZERO - DO 30 K = IP(J),IP(J+1)-1 - IF (ABS(A(K)).GT.FACT) FACT = ABS(A(K)) - 30 CONTINUE - DO 40 K = IP(J),IP(J+1)-1 - DW(2*N+K) = FACT - ABS(A(K)) - 40 CONTINUE - 50 CONTINUE -C B = DW(2N+1:2N+NE); IW(1:5N) and DW(1:2N) are workspaces - CALL MC64WD(N,NE,IP,IRN,DW(2*N+1),CPERM,NUM, - & IW(1),IW(N+1),IW(2*N+1),IW(3*N+1),IW(4*N+1), - & DW(1),DW(N+1)) - GO TO 90 - ENDIF - - IF (JOB.EQ.5) THEN - DO 75 J = 1,N - FACT = ZERO - DO 60 K = IP(J),IP(J+1)-1 - DW(3*N+K) = ABS(A(K)) - IF (DW(3*N+K).GT.FACT) FACT = DW(3*N+K) - 60 CONTINUE - DW(2*N+J) = FACT - IF (FACT.NE.ZERO) THEN - FACT = LOG(FACT) - ELSE - FACT = RINF/N - ENDIF - DO 70 K = IP(J),IP(J+1)-1 - IF (DW(3*N+K).NE.ZERO) THEN - DW(3*N+K) = FACT - LOG(DW(3*N+K)) - ELSE - DW(3*N+K) = RINF/N - ENDIF - 70 CONTINUE - 75 CONTINUE -C B = DW(3N+1:3N+NE); IW(1:5N) and DW(1:2N) are workspaces - CALL MC64WD(N,NE,IP,IRN,DW(3*N+1),CPERM,NUM, - & IW(1),IW(N+1),IW(2*N+1),IW(3*N+1),IW(4*N+1), - & DW(1),DW(N+1)) - IF (NUM.EQ.N) THEN - DO 80 J = 1,N - IF (DW(2*N+J).NE.ZERO) THEN - DW(N+J) = DW(N+J) - LOG(DW(2*N+J)) - ELSE - DW(N+J) = ZERO - ENDIF - 80 CONTINUE - ENDIF -C Check size of scaling factors - FACT = 0.5*LOG(RINF) - DO 86 J = 1,N - IF (DW(J).LT.FACT .AND. DW(N+J).LT.FACT) GO TO 86 - INFO(1) = 2 - GO TO 90 - 86 CONTINUE -C GO TO 90 - ENDIF - - 90 IF (INFO(1).EQ.0 .AND. NUM.LT.N) THEN -C Matrix is structurally singular, return with warning - INFO(1) = 1 - IF (ICNTL(2).GE.0) WRITE(ICNTL(2),9011) INFO(1) - ENDIF - IF (INFO(1).EQ.2) THEN -C Scaling factors are large, return with warning - IF (ICNTL(2).GE.0) WRITE(ICNTL(2),9012) INFO(1) - ENDIF - -C Print diagnostics on output - IF (ICNTL(3).GE.0) THEN - WRITE(ICNTL(3),9030) (INFO(J),J=1,2) - WRITE(ICNTL(3),9031) NUM - WRITE(ICNTL(3),9032) (CPERM(J),J=1,N) - IF (JOB.EQ.5) THEN - WRITE(ICNTL(3),9033) (DW(J),J=1,N) - WRITE(ICNTL(3),9034) (DW(N+J),J=1,N) - ENDIF - ENDIF - -C Return from subroutine. - 99 RETURN - - 9001 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2, - & ' because ',(A),' = ',I10) - 9004 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2/ - & ' LIW too small, must be at least ',I8) - 9005 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2/ - & ' LDW too small, must be at least ',I8) - 9006 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2/ - & ' Column ',I8, - & ' contains an entry with invalid row index ',I8) - 9007 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2/ - & ' Column ',I8, - & ' contains two or more entries with row index ',I8) - 9011 FORMAT (' ****** Warning from MC64A/AD. INFO(1) = ',I2/ - & ' The matrix is structurally singular.') - 9012 FORMAT (' ****** Warning from MC64A/AD. INFO(1) = ',I2/ - & ' Some scaling factors may be too large.') - 9020 FORMAT (' ****** Input parameters for MC64A/AD:'/ - & ' JOB = ',I8/' N = ',I8/' NE = ',I8) - 9021 FORMAT (' IP(1:N+1) = ',8I8/(14X,8I8)) - 9022 FORMAT (' IRN(1:NE) = ',8I8/(14X,8I8)) - 9023 FORMAT (' A(1:NE) = ',4(1PD14.4)/(14X,4(1PD14.4))) - 9030 FORMAT (' ****** Output parameters for MC64A/AD:'/ - & ' INFO(1:2) = ',2I8) - 9031 FORMAT (' NUM = ',I8) - 9032 FORMAT (' CPERM(1:N) = ',8I8/(14X,8I8)) - 9033 FORMAT (' DW(1:N) = ',5(F11.3)/(14X,5(F11.3))) - 9034 FORMAT (' DW(N+1:2N) = ',5(F11.3)/(14X,5(F11.3))) - END - -C********************************************************************** - SUBROUTINE MC64BD(N,NE,IP,IRN,A,IPERM,NUM,JPERM,PR,Q,L,D) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER N,NE,NUM - INTEGER IP(N+1),IRN(NE),IPERM(N),JPERM(N),PR(N),Q(N),L(N) - DOUBLE PRECISION A(NE),D(N) - -C N, NE, IP, IRN are described in MC64A/AD. -C A is a REAL (DOUBLE PRECISION in the D-version) array of length -C NE. A(K), K=1..NE, must be set to the value of the entry -C that corresponds to IRN(K). It is not altered. -C IPERM is an INTEGER array of length N. On exit, it contains the -C matching: IPERM(I) = 0 or row I is matched to column IPERM(I). -C NUM is INTEGER variable. On exit, it contains the cardinality of the -C matching stored in IPERM. -C IW is an INTEGER work array of length 4N. -C DW is a REAL (DOUBLE PRECISION in D-version) work array of length N. - -C Local variables - INTEGER I,II,J,JJ,JORD,Q0,QLEN,IDUM,JDUM,ISP,JSP, - & K,KK,KK1,KK2,I0,UP,LOW - DOUBLE PRECISION CSP,DI,DNEW,DQ0,AI,A0,BV -C Local parameters - DOUBLE PRECISION RINF,ZERO,MINONE - PARAMETER (ZERO=0.0D+0,MINONE=-1.0D+0) -C Intrinsic functions - INTRINSIC ABS,MIN -C External subroutines and/or functions -c EXTERNAL FD05AD,MC64DD,MC64ED,MC64FD, DLAMCH -c DOUBLE PRECISION FD05AD, DLAMCH - EXTERNAL MC64DD,MC64ED,MC64FD, DLAMCH - DOUBLE PRECISION DLAMCH - -C Set RINF to largest positive real number -c XSL RINF = FD05AD(5) - RINF = DLAMCH('Overflow') - -C Initialization - NUM = 0 - BV = RINF - DO 10 K = 1,N - IPERM(K) = 0 - JPERM(K) = 0 - PR(K) = IP(K) - D(K) = ZERO - 10 CONTINUE -C Scan columns of matrix; - DO 20 J = 1,N - A0 = MINONE - DO 30 K = IP(J),IP(J+1)-1 - I = IRN(K) - AI = ABS(A(K)) - IF (AI.GT.D(I)) D(I) = AI - IF (JPERM(J).NE.0) GO TO 30 - IF (AI.GE.BV) THEN - A0 = BV - IF (IPERM(I).NE.0) GO TO 30 - JPERM(J) = I - IPERM(I) = J - NUM = NUM + 1 - ELSE - IF (AI.LE.A0) GO TO 30 - A0 = AI - I0 = I - ENDIF - 30 CONTINUE - IF (A0.NE.MINONE .AND. A0.LT.BV) THEN - BV = A0 - IF (IPERM(I0).NE.0) GO TO 20 - IPERM(I0) = J - JPERM(J) = I0 - NUM = NUM + 1 - ENDIF - 20 CONTINUE -C Update BV with smallest of all the largest maximum absolute values -C of the rows. - DO 25 I = 1,N - BV = MIN(BV,D(I)) - 25 CONTINUE - IF (NUM.EQ.N) GO TO 1000 -C Rescan unassigned columns; improve initial assignment - DO 95 J = 1,N - IF (JPERM(J).NE.0) GO TO 95 - DO 50 K = IP(J),IP(J+1)-1 - I = IRN(K) - AI = ABS(A(K)) - IF (AI.LT.BV) GO TO 50 - IF (IPERM(I).EQ.0) GO TO 90 - JJ = IPERM(I) - KK1 = PR(JJ) - KK2 = IP(JJ+1) - 1 - IF (KK1.GT.KK2) GO TO 50 - DO 70 KK = KK1,KK2 - II = IRN(KK) - IF (IPERM(II).NE.0) GO TO 70 - IF (ABS(A(KK)).GE.BV) GO TO 80 - 70 CONTINUE - PR(JJ) = KK2 + 1 - 50 CONTINUE - GO TO 95 - 80 JPERM(JJ) = II - IPERM(II) = JJ - PR(JJ) = KK + 1 - 90 NUM = NUM + 1 - JPERM(J) = I - IPERM(I) = J - PR(J) = K + 1 - 95 CONTINUE - IF (NUM.EQ.N) GO TO 1000 - -C Prepare for main loop - DO 99 I = 1,N - D(I) = MINONE - L(I) = 0 - 99 CONTINUE - -C Main loop ... each pass round this loop is similar to Dijkstra's -C algorithm for solving the single source shortest path problem - - DO 100 JORD = 1,N - - IF (JPERM(JORD).NE.0) GO TO 100 - QLEN = 0 - LOW = N + 1 - UP = N + 1 -C CSP is cost of shortest path to any unassigned row -C ISP is matrix position of unassigned row element in shortest path -C JSP is column index of unassigned row element in shortest path - CSP = MINONE -C Build shortest path tree starting from unassigned column JORD - J = JORD - PR(J) = -1 - -C Scan column J - DO 115 K = IP(J),IP(J+1)-1 - I = IRN(K) - DNEW = ABS(A(K)) - IF (CSP.GE.DNEW) GO TO 115 - IF (IPERM(I).EQ.0) THEN -C Row I is unassigned; update shortest path info - CSP = DNEW - ISP = I - JSP = J - IF (CSP.GE.BV) GO TO 160 - ELSE - D(I) = DNEW - IF (DNEW.GE.BV) THEN -C Add row I to Q2 - LOW = LOW - 1 - Q(LOW) = I - ELSE -C Add row I to Q, and push it - QLEN = QLEN + 1 - L(I) = QLEN - CALL MC64DD(I,N,Q,D,L,1) - ENDIF - JJ = IPERM(I) - PR(JJ) = J - ENDIF - 115 CONTINUE - - DO 150 JDUM = 1,NUM -C If Q2 is empty, extract new rows from Q - IF (LOW.EQ.UP) THEN - IF (QLEN.EQ.0) GO TO 160 - I = Q(1) - IF (CSP.GE.D(I)) GO TO 160 - BV = D(I) - DO 152 IDUM = 1,N - CALL MC64ED(QLEN,N,Q,D,L,1) - L(I) = 0 - LOW = LOW - 1 - Q(LOW) = I - IF (QLEN.EQ.0) GO TO 153 - I = Q(1) - IF (D(I).NE.BV) GO TO 153 - 152 CONTINUE -C End of dummy loop; this point is never reached - ENDIF -C Move row Q0 - 153 UP = UP - 1 - Q0 = Q(UP) - DQ0 = D(Q0) - L(Q0) = UP -C Scan column that matches with row Q0 - J = IPERM(Q0) - DO 155 K = IP(J),IP(J+1)-1 - I = IRN(K) -C Update D(I) - IF (L(I).GE.UP) GO TO 155 - DNEW = MIN(DQ0,ABS(A(K))) - IF (CSP.GE.DNEW) GO TO 155 - IF (IPERM(I).EQ.0) THEN -C Row I is unassigned; update shortest path info - CSP = DNEW - ISP = I - JSP = J - IF (CSP.GE.BV) GO TO 160 - ELSE - DI = D(I) - IF (DI.GE.BV .OR. DI.GE.DNEW) GO TO 155 - D(I) = DNEW - IF (DNEW.GE.BV) THEN -C Delete row I from Q (if necessary); add row I to Q2 - IF (DI.NE.MINONE) - * CALL MC64FD(L(I),QLEN,N,Q,D,L,1) - L(I) = 0 - LOW = LOW - 1 - Q(LOW) = I - ELSE -C Add row I to Q (if necessary); push row I up Q - IF (DI.EQ.MINONE) THEN - QLEN = QLEN + 1 - L(I) = QLEN - ENDIF - CALL MC64DD(I,N,Q,D,L,1) - ENDIF -C Update tree - JJ = IPERM(I) - PR(JJ) = J - ENDIF - 155 CONTINUE - 150 CONTINUE - -C If CSP = MINONE, no augmenting path is found - 160 IF (CSP.EQ.MINONE) GO TO 190 -C Update bottleneck value - BV = MIN(BV,CSP) -C Find augmenting path by tracing backward in PR; update IPERM,JPERM - NUM = NUM + 1 - I = ISP - J = JSP - DO 170 JDUM = 1,NUM+1 - I0 = JPERM(J) - JPERM(J) = I - IPERM(I) = J - J = PR(J) - IF (J.EQ.-1) GO TO 190 - I = I0 - 170 CONTINUE -C End of dummy loop; this point is never reached - 190 DO 191 KK = UP,N - I = Q(KK) - D(I) = MINONE - L(I) = 0 - 191 CONTINUE - DO 192 KK = LOW,UP-1 - I = Q(KK) - D(I) = MINONE - 192 CONTINUE - DO 193 KK = 1,QLEN - I = Q(KK) - D(I) = MINONE - L(I) = 0 - 193 CONTINUE - - 100 CONTINUE -C End of main loop - -C BV is bottleneck value of final matching - IF (NUM.EQ.N) GO TO 1000 - -C Matrix is structurally singular, complete IPERM. -C JPERM, PR are work arrays - DO 300 J = 1,N - JPERM(J) = 0 - 300 CONTINUE - K = 0 - DO 310 I = 1,N - IF (IPERM(I).EQ.0) THEN - K = K + 1 - PR(K) = I - ELSE - J = IPERM(I) - JPERM(J) = I - ENDIF - 310 CONTINUE - K = 0 - DO 320 I = 1,N - IF (JPERM(I).NE.0) GO TO 320 - K = K + 1 - JDUM = PR(K) - IPERM(JDUM) = I - 320 CONTINUE - - 1000 RETURN - END - -C********************************************************************** - SUBROUTINE MC64DD(I,N,Q,D,L,IWAY) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER I,N,IWAY - INTEGER Q(N),L(N) - DOUBLE PRECISION D(N) - -C Variables N,Q,D,L are described in MC64B/BD -C IF IWAY is equal to 1, then -C node I is pushed from its current position upwards -C IF IWAY is not equal to 1, then -C node I is pushed from its current position downwards - -C Local variables and parameters - INTEGER IDUM,K,POS,POSK,QK - PARAMETER (K=2) - DOUBLE PRECISION DI - - DI = D(I) - POS = L(I) -C POS is index of current position of I in the tree - IF (IWAY.EQ.1) THEN - DO 10 IDUM = 1,N - IF (POS.LE.1) GO TO 20 - POSK = POS/K - QK = Q(POSK) - IF (DI.LE.D(QK)) GO TO 20 - Q(POS) = QK - L(QK) = POS - POS = POSK - 10 CONTINUE -C End of dummy loop; this point is never reached - ELSE - DO 15 IDUM = 1,N - IF (POS.LE.1) GO TO 20 - POSK = POS/K - QK = Q(POSK) - IF (DI.GE.D(QK)) GO TO 20 - Q(POS) = QK - L(QK) = POS - POS = POSK - 15 CONTINUE -C End of dummy loop; this point is never reached - ENDIF -C End of dummy if; this point is never reached - 20 Q(POS) = I - L(I) = POS - - RETURN - END - -C********************************************************************** - SUBROUTINE MC64ED(QLEN,N,Q,D,L,IWAY) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER QLEN,N,IWAY - INTEGER Q(N),L(N) - DOUBLE PRECISION D(N) - -C Variables QLEN,N,Q,D,L are described in MC64B/BD (IWAY = 1) or -C MC64W/WD (IWAY = 2) -C The root node is deleted from the binary heap. - -C Local variables and parameters - INTEGER I,IDUM,K,POS,POSK - PARAMETER (K=2) - DOUBLE PRECISION DK,DR,DI - -C Move last element to begin of Q - I = Q(QLEN) - DI = D(I) - QLEN = QLEN - 1 - POS = 1 - IF (IWAY.EQ.1) THEN - DO 10 IDUM = 1,N - POSK = K*POS - IF (POSK.GT.QLEN) GO TO 20 - DK = D(Q(POSK)) - IF (POSK.LT.QLEN) THEN - DR = D(Q(POSK+1)) - IF (DK.LT.DR) THEN - POSK = POSK + 1 - DK = DR - ENDIF - ENDIF - IF (DI.GE.DK) GO TO 20 -C Exchange old last element with larger priority child - Q(POS) = Q(POSK) - L(Q(POS)) = POS - POS = POSK - 10 CONTINUE -C End of dummy loop; this point is never reached - ELSE - DO 15 IDUM = 1,N - POSK = K*POS - IF (POSK.GT.QLEN) GO TO 20 - DK = D(Q(POSK)) - IF (POSK.LT.QLEN) THEN - DR = D(Q(POSK+1)) - IF (DK.GT.DR) THEN - POSK = POSK + 1 - DK = DR - ENDIF - ENDIF - IF (DI.LE.DK) GO TO 20 -C Exchange old last element with smaller child - Q(POS) = Q(POSK) - L(Q(POS)) = POS - POS = POSK - 15 CONTINUE -C End of dummy loop; this point is never reached - ENDIF -C End of dummy if; this point is never reached - 20 Q(POS) = I - L(I) = POS - - RETURN - END - -C********************************************************************** - SUBROUTINE MC64FD(POS0,QLEN,N,Q,D,L,IWAY) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER POS0,QLEN,N,IWAY - INTEGER Q(N),L(N) - DOUBLE PRECISION D(N) - -C Variables QLEN,N,Q,D,L are described in MC64B/BD (IWAY = 1) or -C MC64WD (IWAY = 2). -C Move last element in the heap - - INTEGER I,IDUM,K,POS,POSK,QK - PARAMETER (K=2) - DOUBLE PRECISION DK,DR,DI - -C Quick return, if possible - IF (QLEN.EQ.POS0) THEN - QLEN = QLEN - 1 - RETURN - ENDIF - -C Move last element from queue Q to position POS0 -C POS is current position of node I in the tree - I = Q(QLEN) - DI = D(I) - QLEN = QLEN - 1 - POS = POS0 - IF (IWAY.EQ.1) THEN - DO 10 IDUM = 1,N - IF (POS.LE.1) GO TO 20 - POSK = POS/K - QK = Q(POSK) - IF (DI.LE.D(QK)) GO TO 20 - Q(POS) = QK - L(QK) = POS - POS = POSK - 10 CONTINUE -C End of dummy loop; this point is never reached - 20 Q(POS) = I - L(I) = POS - DO 30 IDUM = 1,N - POSK = K*POS - IF (POSK.GT.QLEN) GO TO 40 - DK = D(Q(POSK)) - IF (POSK.LT.QLEN) THEN - DR = D(Q(POSK+1)) - IF (DK.LT.DR) THEN - POSK = POSK + 1 - DK = DR - ENDIF - ENDIF - IF (DI.GE.DK) GO TO 40 - QK = Q(POSK) - Q(POS) = QK - L(QK) = POS - POS = POSK - 30 CONTINUE -C End of dummy loop; this point is never reached - ELSE - DO 32 IDUM = 1,N - IF (POS.LE.1) GO TO 34 - POSK = POS/K - QK = Q(POSK) - IF (DI.GE.D(QK)) GO TO 34 - Q(POS) = QK - L(QK) = POS - POS = POSK - 32 CONTINUE -C End of dummy loop; this point is never reached - 34 Q(POS) = I - L(I) = POS - DO 36 IDUM = 1,N - POSK = K*POS - IF (POSK.GT.QLEN) GO TO 40 - DK = D(Q(POSK)) - IF (POSK.LT.QLEN) THEN - DR = D(Q(POSK+1)) - IF (DK.GT.DR) THEN - POSK = POSK + 1 - DK = DR - ENDIF - ENDIF - IF (DI.LE.DK) GO TO 40 - QK = Q(POSK) - Q(POS) = QK - L(QK) = POS - POS = POSK - 36 CONTINUE -C End of dummy loop; this point is never reached - ENDIF -C End of dummy if; this point is never reached - 40 Q(POS) = I - L(I) = POS - - RETURN - END - -C********************************************************************** - SUBROUTINE MC64RD(N,NE,IP,IRN,A) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER N,NE - INTEGER IP(N+1),IRN(NE) - DOUBLE PRECISION A(NE) - -C This subroutine sorts the entries in each column of the -C sparse matrix (defined by N,NE,IP,IRN,A) by decreasing -C numerical value. - -C Local constants - INTEGER THRESH,TDLEN - PARAMETER (THRESH=15,TDLEN=50) -C Local variables - INTEGER J,IPJ,K,LEN,R,S,HI,FIRST,MID,LAST,TD - DOUBLE PRECISION HA,KEY -C Local arrays - INTEGER TODO(TDLEN) - - DO 100 J = 1,N - LEN = IP(J+1) - IP(J) - IF (LEN.LE.1) GO TO 100 - IPJ = IP(J) - -C Sort array roughly with partial quicksort - IF (LEN.LT.THRESH) GO TO 400 - TODO(1) = IPJ - TODO(2) = IPJ + LEN - TD = 2 - 500 CONTINUE - FIRST = TODO(TD-1) - LAST = TODO(TD) -C KEY is the smallest of two values present in interval [FIRST,LAST) - KEY = A((FIRST+LAST)/2) - DO 475 K = FIRST,LAST-1 - HA = A(K) - IF (HA.EQ.KEY) GO TO 475 - IF (HA.GT.KEY) GO TO 470 - KEY = HA - GO TO 470 - 475 CONTINUE -C Only one value found in interval, so it is already sorted - TD = TD - 2 - GO TO 425 - -C Reorder interval [FIRST,LAST) such that entries before MID are gt KEY - 470 MID = FIRST - DO 450 K = FIRST,LAST-1 - IF (A(K).LE.KEY) GO TO 450 - HA = A(MID) - A(MID) = A(K) - A(K) = HA - HI = IRN(MID) - IRN(MID) = IRN(K) - IRN(K) = HI - MID = MID + 1 - 450 CONTINUE -C Both subintervals [FIRST,MID), [MID,LAST) are nonempty -C Stack the longest of the two subintervals first - IF (MID-FIRST.GE.LAST-MID) THEN - TODO(TD+2) = LAST - TODO(TD+1) = MID - TODO(TD) = MID -C TODO(TD-1) = FIRST - ELSE - TODO(TD+2) = MID - TODO(TD+1) = FIRST - TODO(TD) = LAST - TODO(TD-1) = MID - ENDIF - TD = TD + 2 - - 425 CONTINUE - IF (TD.EQ.0) GO TO 400 -C There is still work to be done - IF (TODO(TD)-TODO(TD-1).GE.THRESH) GO TO 500 -C Next interval is already short enough for straightforward insertion - TD = TD - 2 - GO TO 425 - -C Complete sorting with straightforward insertion - 400 DO 200 R = IPJ+1,IPJ+LEN-1 - IF (A(R-1) .LT. A(R)) THEN - HA = A(R) - HI = IRN(R) - A(R) = A(R-1) - IRN(R) = IRN(R-1) - DO 300 S = R-1,IPJ+1,-1 - IF (A(S-1) .LT. HA) THEN - A(S) = A(S-1) - IRN(S) = IRN(S-1) - ELSE - A(S) = HA - IRN(S) = HI - GO TO 200 - END IF - 300 CONTINUE - A(IPJ) = HA - IRN(IPJ) = HI - END IF - 200 CONTINUE - - 100 CONTINUE - - RETURN - END - -C********************************************************************** - SUBROUTINE MC64SD(N,NE,IP,IRN,A,IPERM,NUMX, - & W,LEN,LENL,LENH,FC,IW,IW4) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER N,NE,NUMX - INTEGER IP(N+1),IRN(NE),IPERM(N), - & W(N),LEN(N),LENL(N),LENH(N),FC(N),IW(N),IW4(4*N) - DOUBLE PRECISION A(NE) - -C N, NE, IP, IRN, are described in MC64A/AD. -C A is a REAL (DOUBLE PRECISION in the D-version) array of length NE. -C A(K), K=1..NE, must be set to the value of the entry that -C corresponds to IRN(k). The entries in each column must be -C non-negative and ordered by decreasing value. -C IPERM is an INTEGER array of length N. On exit, it contains the -C bottleneck matching: IPERM(I) - 0 or row I is matched to column -C IPERM(I). -C NUMX is an INTEGER variable. On exit, it contains the cardinality -C of the matching stored in IPERM. -C IW is an INTEGER work array of length 10N. - -C FC is an integer array of length N that contains the list of -C unmatched columns. -C LEN(J), LENL(J), LENH(J) are integer arrays of length N that point -C to entries in matrix column J. -C In the matrix defined by the column parts IP(J)+LENL(J) we know -C a matching does not exist; in the matrix defined by the column -C parts IP(J)+LENH(J) we know one exists. -C LEN(J) lies between LENL(J) and LENH(J) and determines the matrix -C that is tested for a maximum matching. -C W is an integer array of length N and contains the indices of the -C columns for which LENL ne LENH. -C WLEN is number of indices stored in array W. -C IW is integer work array of length N. -C IW4 is integer work array of length 4N used by MC64U/UD. - - INTEGER NUM,NVAL,WLEN,II,I,J,K,L,CNT,MOD,IDUM1,IDUM2,IDUM3 - DOUBLE PRECISION BVAL,BMIN,BMAX,RINF -c EXTERNAL FD05AD,MC64QD,MC64UD -c DOUBLE PRECISION FD05AD - EXTERNAL MC64QD,MC64UD, DLAMCH - DOUBLE PRECISION DLAMCH - -C BMIN and BMAX are such that a maximum matching exists for the input -C matrix in which all entries smaller than BMIN are dropped. -C For BMAX, a maximum matching does not exist. -C BVAL is a value between BMIN and BMAX. -C CNT is the number of calls made to MC64U/UD so far. -C NUM is the cardinality of last matching found. - -C Set RINF to largest positive real number -c XSL RINF = FD05AD(5) - RINF = DLAMCH('Overflow') - -C Compute a first maximum matching from scratch on whole matrix. - DO 20 J = 1,N - FC(J) = J - IW(J) = 0 - LEN(J) = IP(J+1) - IP(J) - 20 CONTINUE -C The first call to MC64U/UD - CNT = 1 - MOD = 1 - NUMX = 0 - CALL MC64UD(CNT,MOD,N,IRN,NE,IP,LEN,FC,IW,NUMX,N, - & IW4(1),IW4(N+1),IW4(2*N+1),IW4(3*N+1)) - -C IW contains a maximum matching of length NUMX. - NUM = NUMX - - IF (NUM.NE.N) THEN -C Matrix is structurally singular - BMAX = RINF - ELSE -C Matrix is structurally nonsingular, NUM=NUMX=N; -C Set BMAX just above the smallest of all the maximum absolute -C values of the columns - BMAX = RINF - DO 30 J = 1,N - BVAL = 0.0 - DO 25 K = IP(J),IP(J+1)-1 - IF (A(K).GT.BVAL) BVAL = A(K) - 25 CONTINUE - IF (BVAL.LT.BMAX) BMAX = BVAL - 30 CONTINUE - BMAX = 1.001 * BMAX - ENDIF - -C Initialize BVAL,BMIN - BVAL = 0.0 - BMIN = 0.0 -C Initialize LENL,LEN,LENH,W,WLEN according to BMAX. -C Set LEN(J), LENH(J) just after last entry in column J. -C Set LENL(J) just after last entry in column J with value ge BMAX. - WLEN = 0 - DO 48 J = 1,N - L = IP(J+1) - IP(J) - LENH(J) = L - LEN(J) = L - DO 45 K = IP(J),IP(J+1)-1 - IF (A(K).LT.BMAX) GO TO 46 - 45 CONTINUE -C Column J is empty or all entries are ge BMAX - K = IP(J+1) - 46 LENL(J) = K - IP(J) -C Add J to W if LENL(J) ne LENH(J) - IF (LENL(J).EQ.L) GO TO 48 - WLEN = WLEN + 1 - W(WLEN) = J - 48 CONTINUE - -C Main loop - DO 90 IDUM1 = 1,NE - IF (NUM.EQ.NUMX) THEN -C We have a maximum matching in IW; store IW in IPERM - DO 50 I = 1,N - IPERM(I) = IW(I) - 50 CONTINUE -C Keep going round this loop until matching IW is no longer maximum. - DO 80 IDUM2 = 1,NE - BMIN = BVAL - IF (BMAX .EQ. BMIN) GO TO 99 -C Find splitting value BVAL - CALL MC64QD(IP,LENL,LEN,W,WLEN,A,NVAL,BVAL) - IF (NVAL.LE.1) GO TO 99 -C Set LEN such that all matrix entries with value lt BVAL are -C discarded. Store old LEN in LENH. Do this for all columns W(K). -C Each step, either K is incremented or WLEN is decremented. - K = 1 - DO 70 IDUM3 = 1,N - IF (K.GT.WLEN) GO TO 71 - J = W(K) - DO 55 II = IP(J)+LEN(J)-1,IP(J)+LENL(J),-1 - IF (A(II).GE.BVAL) GO TO 60 - I = IRN(II) - IF (IW(I).NE.J) GO TO 55 -C Remove entry from matching - IW(I) = 0 - NUM = NUM - 1 - FC(N-NUM) = J - 55 CONTINUE - 60 LENH(J) = LEN(J) -C IP(J)+LEN(J)-1 is last entry in column ge BVAL - LEN(J) = II - IP(J) + 1 -C If LENH(J) = LENL(J), remove J from W - IF (LENL(J).EQ.LENH(J)) THEN - W(K) = W(WLEN) - WLEN = WLEN - 1 - ELSE - K = K + 1 - ENDIF - 70 CONTINUE - 71 IF (NUM.LT.NUMX) GO TO 81 - 80 CONTINUE -C End of dummy loop; this point is never reached -C Set mode for next call to MC64U/UD - 81 MOD = 1 - ELSE -C We do not have a maximum matching in IW. - BMAX = BVAL -C BMIN is the bottleneck value of a maximum matching; -C for BMAX the matching is not maximum, so BMAX>BMIN -C IF (BMAX .EQ. BMIN) GO TO 99 -C Find splitting value BVAL - CALL MC64QD(IP,LEN,LENH,W,WLEN,A,NVAL,BVAL) - IF (NVAL.EQ.0. OR. BVAL.EQ.BMIN) GO TO 99 -C Set LEN such that all matrix entries with value ge BVAL are -C inside matrix. Store old LEN in LENL. Do this for all columns W(K). -C Each step, either K is incremented or WLEN is decremented. - K = 1 - DO 87 IDUM3 = 1,N - IF (K.GT.WLEN) GO TO 88 - J = W(K) - DO 85 II = IP(J)+LEN(J),IP(J)+LENH(J)-1 - IF (A(II).LT.BVAL) GO TO 86 - 85 CONTINUE - 86 LENL(J) = LEN(J) - LEN(J) = II - IP(J) - IF (LENL(J).EQ.LENH(J)) THEN - W(K) = W(WLEN) - WLEN = WLEN - 1 - ELSE - K = K + 1 - ENDIF - 87 CONTINUE -C End of dummy loop; this point is never reached -C Set mode for next call to MC64U/UD - 88 MOD = 0 - ENDIF - CNT = CNT + 1 - CALL MC64UD(CNT,MOD,N,IRN,NE,IP,LEN,FC,IW,NUM,NUMX, - & IW4(1),IW4(N+1),IW4(2*N+1),IW4(3*N+1)) - -C IW contains maximum matching of length NUM - 90 CONTINUE -C End of dummy loop; this point is never reached - -C BMIN is bottleneck value of final matching - 99 IF (NUMX.EQ.N) GO TO 1000 -C The matrix is structurally singular, complete IPERM -C W, IW are work arrays - DO 300 J = 1,N - W(J) = 0 - 300 CONTINUE - K = 0 - DO 310 I = 1,N - IF (IPERM(I).EQ.0) THEN - K = K + 1 - IW(K) = I - ELSE - J = IPERM(I) - W(J) = I - ENDIF - 310 CONTINUE - K = 0 - DO 320 J = 1,N - IF (W(J).NE.0) GO TO 320 - K = K + 1 - IDUM1 = IW(K) - IPERM(IDUM1) = J - 320 CONTINUE - - 1000 RETURN - END - -C********************************************************************** - SUBROUTINE MC64QD(IP,LENL,LENH,W,WLEN,A,NVAL,VAL) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER WLEN,NVAL - INTEGER IP(*),LENL(*),LENH(*),W(*) - DOUBLE PRECISION A(*),VAL - -C This routine searches for at most XX different numerical values -C in the columns W(1:WLEN). XX>=2. -C Each column J is scanned between IP(J)+LENL(J) and IP(J)+LENH(J)-1 -C until XX values are found or all columns have been considered. -C On output, NVAL is the number of different values that is found -C and SPLIT(1:NVAL) contains the values in decreasing order. -C If NVAL > 0, the routine returns VAL = SPLIT((NVAL+1)/2). -C - INTEGER XX,J,K,II,S,POS - PARAMETER (XX=10) - DOUBLE PRECISION SPLIT(XX),HA - -C Scan columns in W(1:WLEN). For each encountered value, if value not -C already present in SPLIT(1:NVAL), insert value such that SPLIT -C remains sorted by decreasing value. -C The sorting is done by straightforward insertion; therefore the use -C of this routine should be avoided for large XX (XX < 20). - NVAL = 0 - DO 10 K = 1,WLEN - J = W(K) - DO 15 II = IP(J)+LENL(J),IP(J)+LENH(J)-1 - HA = A(II) - IF (NVAL.EQ.0) THEN - SPLIT(1) = HA - NVAL = 1 - ELSE -C Check presence of HA in SPLIT - DO 20 S = NVAL,1,-1 - IF (SPLIT(S).EQ.HA) GO TO 15 - IF (SPLIT(S).GT.HA) THEN - POS = S + 1 - GO TO 21 - ENDIF - 20 CONTINUE - POS = 1 -C The insertion - 21 DO 22 S = NVAL,POS,-1 - SPLIT(S+1) = SPLIT(S) - 22 CONTINUE - SPLIT(POS) = HA - NVAL = NVAL + 1 - ENDIF -C Exit loop if XX values are found - IF (NVAL.EQ.XX) GO TO 11 - 15 CONTINUE - 10 CONTINUE -C Determine VAL - 11 IF (NVAL.GT.0) VAL = SPLIT((NVAL+1)/2) - - RETURN - END - -C********************************************************************** - SUBROUTINE MC64UD(ID,MOD,N,IRN,LIRN,IP,LENC,FC,IPERM,NUM,NUMX, - & PR,ARP,CV,OUT) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER ID,MOD,N,LIRN,NUM,NUMX - INTEGER ARP(N),CV(N),IRN(LIRN),IP(N), - & FC(N),IPERM(N),LENC(N),OUT(N),PR(N) - -C PR(J) is the previous column to J in the depth first search. -C Array PR is used as workspace in the sorting algorithm. -C Elements (I,IPERM(I)) I=1,..,N are entries at the end of the -C algorithm unless N assignments have not been made in which case -C N-NUM pairs (I,IPERM(I)) will not be entries in the matrix. -C CV(I) is the most recent loop number (ID+JORD) at which row I -C was visited. -C ARP(J) is the number of entries in column J which have been scanned -C when looking for a cheap assignment. -C OUT(J) is one less than the number of entries in column J which have -C not been scanned during one pass through the main loop. -C NUMX is maximum possible size of matching. - - INTEGER I,II,IN1,IN2,J,J1,JORD,K,KK,LAST,NFC, - & NUM0,NUM1,NUM2,ID0,ID1 - - IF (ID.EQ.1) THEN -C The first call to MC64U/UD. -C Initialize CV and ARP; parameters MOD, NUMX are not accessed - DO 5 I = 1,N - CV(I) = 0 - ARP(I) = 0 - 5 CONTINUE - NUM1 = N - NUM2 = N - ELSE -C Not the first call to MC64U/UD. -C Re-initialize ARP if entries were deleted since last call to MC64U/UD - IF (MOD.EQ.1) THEN - DO 8 I = 1,N - ARP(I) = 0 - 8 CONTINUE - ENDIF - NUM1 = NUMX - NUM2 = N - NUMX - ENDIF - NUM0 = NUM - -C NUM0 is size of input matching -C NUM1 is maximum possible size of matching -C NUM2 is maximum allowed number of unassigned rows/columns -C NUM is size of current matching - -C Quick return if possible -C IF (NUM.EQ.N) GO TO 199 -C NFC is number of rows/columns that could not be assigned - NFC = 0 -C Integers ID0+1 to ID0+N are unique numbers for call ID to MC64U/UD, -C so 1st call uses 1..N, 2nd call uses N+1..2N, etc - ID0 = (ID-1)*N - -C Main loop. Each pass round this loop either results in a new -C assignment or gives a column with no assignment - - DO 100 JORD = NUM0+1,N - -C Each pass uses unique number ID1 - ID1 = ID0 + JORD -C J is unmatched column - J = FC(JORD-NUM0) - PR(J) = -1 - DO 70 K = 1,JORD -C Look for a cheap assignment - IF (ARP(J).GE.LENC(J)) GO TO 30 - IN1 = IP(J) + ARP(J) - IN2 = IP(J) + LENC(J) - 1 - DO 20 II = IN1,IN2 - I = IRN(II) - IF (IPERM(I).EQ.0) GO TO 80 - 20 CONTINUE -C No cheap assignment in row - ARP(J) = LENC(J) -C Begin looking for assignment chain starting with row J - 30 OUT(J) = LENC(J) - 1 -C Inner loop. Extends chain by one or backtracks - DO 60 KK = 1,JORD - IN1 = OUT(J) - IF (IN1.LT.0) GO TO 50 - IN2 = IP(J) + LENC(J) - 1 - IN1 = IN2 - IN1 -C Forward scan - DO 40 II = IN1,IN2 - I = IRN(II) - IF (CV(I).EQ.ID1) GO TO 40 -C Column J has not yet been accessed during this pass - J1 = J - J = IPERM(I) - CV(I) = ID1 - PR(J) = J1 - OUT(J1) = IN2 - II - 1 - GO TO 70 - 40 CONTINUE -C Backtracking step. - 50 J1 = PR(J) - IF (J1.EQ.-1) THEN -C No augmenting path exists for column J. - NFC = NFC + 1 - FC(NFC) = J - IF (NFC.GT.NUM2) THEN -C A matching of maximum size NUM1 is not possible - LAST = JORD - GO TO 101 - ENDIF - GO TO 100 - ENDIF - J = J1 - 60 CONTINUE -C End of dummy loop; this point is never reached - 70 CONTINUE -C End of dummy loop; this point is never reached - -C New assignment is made. - 80 IPERM(I) = J - ARP(J) = II - IP(J) + 1 - NUM = NUM + 1 - DO 90 K = 1,JORD - J = PR(J) - IF (J.EQ.-1) GO TO 95 - II = IP(J) + LENC(J) - OUT(J) - 2 - I = IRN(II) - IPERM(I) = J - 90 CONTINUE -C End of dummy loop; this point is never reached - - 95 IF (NUM.EQ.NUM1) THEN -C A matching of maximum size NUM1 is found - LAST = JORD - GO TO 101 - ENDIF -C - 100 CONTINUE - -C All unassigned columns have been considered - LAST = N - -C Now, a transversal is computed or is not possible. -C Complete FC before returning. - 101 DO 110 JORD = LAST+1,N - NFC = NFC + 1 - FC(NFC) = FC(JORD-NUM0) - 110 CONTINUE - -C 199 RETURN - RETURN - END - -C********************************************************************** - SUBROUTINE MC64WD(N,NE,IP,IRN,A,IPERM,NUM, - & JPERM,OUT,PR,Q,L,U,D) - IMPLICIT NONE -C -C *** Copyright (c) 1999 Council for the Central Laboratory of the -C Research Councils *** -C *** Although every effort has been made to ensure robustness and *** -C *** reliability of the subroutines in this MC64 suite, we *** -C *** disclaim any liability arising through the use or misuse of *** -C *** any of the subroutines. *** -C *** Any problems? Contact ... -C Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no) *** -C - INTEGER N,NE,NUM - INTEGER IP(N+1),IRN(NE),IPERM(N), - & JPERM(N),OUT(N),PR(N),Q(N),L(N) - DOUBLE PRECISION A(NE),U(N),D(N) - -C N, NE, IP, IRN are described in MC64A/AD. -C A is a REAL (DOUBLE PRECISION in the D-version) array of length NE. -C A(K), K=1..NE, must be set to the value of the entry that -C corresponds to IRN(K). It is not altered. -C All values A(K) must be non-negative. -C IPERM is an INTEGER array of length N. On exit, it contains the -C weighted matching: IPERM(I) = 0 or row I is matched to column -C IPERM(I). -C NUM is an INTEGER variable. On exit, it contains the cardinality of -C the matching stored in IPERM. -C IW is an INTEGER work array of length 5N. -C DW is a REAL (DOUBLE PRECISION in the D-version) array of length 2N. -C On exit, U = D(1:N) contains the dual row variable and -C V = D(N+1:2N) contains the dual column variable. If the matrix -C is structurally nonsingular (NUM = N), the following holds: -C U(I)+V(J) <= A(I,J) if IPERM(I) |= J -C U(I)+V(J) = A(I,J) if IPERM(I) = J -C U(I) = 0 if IPERM(I) = 0 -C V(J) = 0 if there is no I for which IPERM(I) = J - -C Local variables - INTEGER I,I0,II,J,JJ,JORD,Q0,QLEN,JDUM,ISP,JSP, - & K,K0,K1,K2,KK,KK1,KK2,UP,LOW - DOUBLE PRECISION CSP,DI,DMIN,DNEW,DQ0,VJ -C Local parameters - DOUBLE PRECISION RINF,ZERO - PARAMETER (ZERO=0.0D+0) -C External subroutines and/or functions -c EXTERNAL FD05AD,MC64DD,MC64ED,MC64FD -c DOUBLE PRECISION FD05AD - EXTERNAL MC64DD,MC64ED,MC64FD, DLAMCH - DOUBLE PRECISION DLAMCH - - -C Set RINF to largest positive real number -c XSL RINF = FD05AD(5) - RINF = DLAMCH('Overflow') - -C Initialization - NUM = 0 - DO 10 K = 1,N - U(K) = RINF - D(K) = ZERO - IPERM(K) = 0 - JPERM(K) = 0 - PR(K) = IP(K) - L(K) = 0 - 10 CONTINUE -C Initialize U(I) - DO 30 J = 1,N - DO 20 K = IP(J),IP(J+1)-1 - I = IRN(K) - IF (A(K).GT.U(I)) GO TO 20 - U(I) = A(K) - IPERM(I) = J - L(I) = K - 20 CONTINUE - 30 CONTINUE - DO 40 I = 1,N - J = IPERM(I) - IF (J.EQ.0) GO TO 40 -C Row I is not empty - IPERM(I) = 0 - IF (JPERM(J).NE.0) GO TO 40 -C Assignment of column J to row I - NUM = NUM + 1 - IPERM(I) = J - JPERM(J) = L(I) - 40 CONTINUE - IF (NUM.EQ.N) GO TO 1000 -C Scan unassigned columns; improve assignment - DO 95 J = 1,N -C JPERM(J) ne 0 iff column J is already assigned - IF (JPERM(J).NE.0) GO TO 95 - K1 = IP(J) - K2 = IP(J+1) - 1 -C Continue only if column J is not empty - IF (K1.GT.K2) GO TO 95 - VJ = RINF - DO 50 K = K1,K2 - I = IRN(K) - DI = A(K) - U(I) - IF (DI.GT.VJ) GO TO 50 - IF (DI.LT.VJ .OR. DI.EQ.RINF) GO TO 55 - IF (IPERM(I).NE.0 .OR. IPERM(I0).EQ.0) GO TO 50 - 55 VJ = DI - I0 = I - K0 = K - 50 CONTINUE - D(J) = VJ - K = K0 - I = I0 - IF (IPERM(I).EQ.0) GO TO 90 - DO 60 K = K0,K2 - I = IRN(K) - IF (A(K)-U(I).GT.VJ) GO TO 60 - JJ = IPERM(I) -C Scan remaining part of assigned column JJ - KK1 = PR(JJ) - KK2 = IP(JJ+1) - 1 - IF (KK1.GT.KK2) GO TO 60 - DO 70 KK = KK1,KK2 - II = IRN(KK) - IF (IPERM(II).GT.0) GO TO 70 - IF (A(KK)-U(II).LE.D(JJ)) GO TO 80 - 70 CONTINUE - PR(JJ) = KK2 + 1 - 60 CONTINUE - GO TO 95 - 80 JPERM(JJ) = KK - IPERM(II) = JJ - PR(JJ) = KK + 1 - 90 NUM = NUM + 1 - JPERM(J) = K - IPERM(I) = J - PR(J) = K + 1 - 95 CONTINUE - IF (NUM.EQ.N) GO TO 1000 - -C Prepare for main loop - DO 99 I = 1,N - D(I) = RINF - L(I) = 0 - 99 CONTINUE - -C Main loop ... each pass round this loop is similar to Dijkstra's -C algorithm for solving the single source shortest path problem - - DO 100 JORD = 1,N - - IF (JPERM(JORD).NE.0) GO TO 100 -C JORD is next unmatched column -C DMIN is the length of shortest path in the tree - DMIN = RINF - QLEN = 0 - LOW = N + 1 - UP = N + 1 -C CSP is the cost of the shortest augmenting path to unassigned row -C IRN(ISP). The corresponding column index is JSP. - CSP = RINF -C Build shortest path tree starting from unassigned column (root) JORD - J = JORD - PR(J) = -1 - -C Scan column J - DO 115 K = IP(J),IP(J+1)-1 - I = IRN(K) - DNEW = A(K) - U(I) - IF (DNEW.GE.CSP) GO TO 115 - IF (IPERM(I).EQ.0) THEN - CSP = DNEW - ISP = K - JSP = J - ELSE - IF (DNEW.LT.DMIN) DMIN = DNEW - D(I) = DNEW - QLEN = QLEN + 1 - Q(QLEN) = K - ENDIF - 115 CONTINUE -C Initialize heap Q and Q2 with rows held in Q(1:QLEN) - Q0 = QLEN - QLEN = 0 - DO 120 KK = 1,Q0 - K = Q(KK) - I = IRN(K) - IF (CSP.LE.D(I)) THEN - D(I) = RINF - GO TO 120 - ENDIF - IF (D(I).LE.DMIN) THEN - LOW = LOW - 1 - Q(LOW) = I - L(I) = LOW - ELSE - QLEN = QLEN + 1 - L(I) = QLEN - CALL MC64DD(I,N,Q,D,L,2) - ENDIF -C Update tree - JJ = IPERM(I) - OUT(JJ) = K - PR(JJ) = J - 120 CONTINUE - - DO 150 JDUM = 1,NUM - -C If Q2 is empty, extract rows from Q - IF (LOW.EQ.UP) THEN - IF (QLEN.EQ.0) GO TO 160 - I = Q(1) - IF (D(I).GE.CSP) GO TO 160 - DMIN = D(I) - 152 CALL MC64ED(QLEN,N,Q,D,L,2) - LOW = LOW - 1 - Q(LOW) = I - L(I) = LOW - IF (QLEN.EQ.0) GO TO 153 - I = Q(1) - IF (D(I).GT.DMIN) GO TO 153 - GO TO 152 - ENDIF -C Q0 is row whose distance D(Q0) to the root is smallest - 153 Q0 = Q(UP-1) - DQ0 = D(Q0) -C Exit loop if path to Q0 is longer than the shortest augmenting path - IF (DQ0.GE.CSP) GO TO 160 - UP = UP - 1 - -C Scan column that matches with row Q0 - J = IPERM(Q0) - VJ = DQ0 - A(JPERM(J)) + U(Q0) - DO 155 K = IP(J),IP(J+1)-1 - I = IRN(K) - IF (L(I).GE.UP) GO TO 155 -C DNEW is new cost - DNEW = VJ + A(K)-U(I) -C Do not update D(I) if DNEW ge cost of shortest path - IF (DNEW.GE.CSP) GO TO 155 - IF (IPERM(I).EQ.0) THEN -C Row I is unmatched; update shortest path info - CSP = DNEW - ISP = K - JSP = J - ELSE -C Row I is matched; do not update D(I) if DNEW is larger - DI = D(I) - IF (DI.LE.DNEW) GO TO 155 - IF (L(I).GE.LOW) GO TO 155 - D(I) = DNEW - IF (DNEW.LE.DMIN) THEN - IF (L(I).NE.0) - * CALL MC64FD(L(I),QLEN,N,Q,D,L,2) - LOW = LOW - 1 - Q(LOW) = I - L(I) = LOW - ELSE - IF (L(I).EQ.0) THEN - QLEN = QLEN + 1 - L(I) = QLEN - ENDIF - CALL MC64DD(I,N,Q,D,L,2) - ENDIF -C Update tree - JJ = IPERM(I) - OUT(JJ) = K - PR(JJ) = J - ENDIF - 155 CONTINUE - 150 CONTINUE - -C If CSP = RINF, no augmenting path is found - 160 IF (CSP.EQ.RINF) GO TO 190 -C Find augmenting path by tracing backward in PR; update IPERM,JPERM - NUM = NUM + 1 - I = IRN(ISP) - IPERM(I) = JSP - JPERM(JSP) = ISP - J = JSP - DO 170 JDUM = 1,NUM - JJ = PR(J) - IF (JJ.EQ.-1) GO TO 180 - K = OUT(J) - I = IRN(K) - IPERM(I) = JJ - JPERM(JJ) = K - J = JJ - 170 CONTINUE -C End of dummy loop; this point is never reached - -C Update U for rows in Q(UP:N) - 180 DO 185 KK = UP,N - I = Q(KK) - U(I) = U(I) + D(I) - CSP - 185 CONTINUE - 190 DO 191 KK = LOW,N - I = Q(KK) - D(I) = RINF - L(I) = 0 - 191 CONTINUE - DO 193 KK = 1,QLEN - I = Q(KK) - D(I) = RINF - L(I) = 0 - 193 CONTINUE - - 100 CONTINUE -C End of main loop - - -C Set dual column variable in D(1:N) - 1000 DO 200 J = 1,N - K = JPERM(J) - IF (K.NE.0) THEN - D(J) = A(K) - U(IRN(K)) - ELSE - D(J) = ZERO - ENDIF - IF (IPERM(J).EQ.0) U(J) = ZERO - 200 CONTINUE - - IF (NUM.EQ.N) GO TO 1100 - -C The matrix is structurally singular, complete IPERM. -C JPERM, OUT are work arrays - DO 300 J = 1,N - JPERM(J) = 0 - 300 CONTINUE - K = 0 - DO 310 I = 1,N - IF (IPERM(I).EQ.0) THEN - K = K + 1 - OUT(K) = I - ELSE - J = IPERM(I) - JPERM(J) = I - ENDIF - 310 CONTINUE - K = 0 - DO 320 J = 1,N - IF (JPERM(J).NE.0) GO TO 320 - K = K + 1 - JDUM = OUT(K) - IPERM(JDUM) = J - 320 CONTINUE - 1100 RETURN - END - diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/sgsitrf.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/sgsitrf.c.bak deleted file mode 100644 index 3dfaadd..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/sgsitrf.c.bak +++ /dev/null @@ -1,626 +0,0 @@ - -/*! @file sgsitf.c - * \brief Computes an ILU factorization of a general sparse matrix - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory. - * June 30, 2009 - * </pre> - */ - -#include "slu_sdefs.h" - -#ifdef DEBUG -int num_drop_L; -#endif - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * SGSITRF computes an ILU factorization of a general sparse m-by-n - * matrix A using partial pivoting with row interchanges. - * The factorization has the form - * Pr * A = L * U - * where Pr is a row permutation matrix, L is lower triangular with unit - * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper - * triangular (upper trapezoidal if A->nrow < A->ncol). - * - * See supermatrix.h for the definition of 'SuperMatrix' structure. - * - * Arguments - * ========= - * - * options (input) superlu_options_t* - * The structure defines the input parameters to control - * how the ILU decomposition will be performed. - * - * A (input) SuperMatrix* - * Original matrix A, permuted by columns, of dimension - * (A->nrow, A->ncol). The type of A can be: - * Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE. - * - * relax (input) int - * To control degree of relaxing supernodes. If the number - * of nodes (columns) in a subtree of the elimination tree is less - * than relax, this subtree is considered as one supernode, - * regardless of the row structures of those columns. - * - * panel_size (input) int - * A panel consists of at most panel_size consecutive columns. - * - * etree (input) int*, dimension (A->ncol) - * Elimination tree of A'*A. - * Note: etree is a vector of parent pointers for a forest whose - * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. - * On input, the columns of A should be permuted so that the - * etree is in a certain postorder. - * - * work (input/output) void*, size (lwork) (in bytes) - * User-supplied work space and space for the output data structures. - * Not referenced if lwork = 0; - * - * lwork (input) int - * Specifies the size of work array in bytes. - * = 0: allocate space internally by system malloc; - * > 0: use user-supplied work array of length lwork in bytes, - * returns error if space runs out. - * = -1: the routine guesses the amount of space needed without - * performing the factorization, and returns it in - * *info; no other side effects. - * - * perm_c (input) int*, dimension (A->ncol) - * Column permutation vector, which defines the - * permutation matrix Pc; perm_c[i] = j means column i of A is - * in position j in A*Pc. - * When searching for diagonal, perm_c[*] is applied to the - * row subscripts of A, so that diagonal threshold pivoting - * can find the diagonal of A, rather than that of A*Pc. - * - * perm_r (input/output) int*, dimension (A->nrow) - * Row permutation vector which defines the permutation matrix Pr, - * perm_r[i] = j means row i of A is in position j in Pr*A. - * If options->Fact = SamePattern_SameRowPerm, the pivoting routine - * will try to use the input perm_r, unless a certain threshold - * criterion is violated. In that case, perm_r is overwritten by - * a new permutation determined by partial pivoting or diagonal - * threshold pivoting. - * Otherwise, perm_r is output argument; - * - * L (output) SuperMatrix* - * The factor L from the factorization Pr*A=L*U; use compressed row - * subscripts storage for supernodes, i.e., L has type: - * Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU. - * - * U (output) SuperMatrix* - * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise - * storage scheme, i.e., U has types: Stype = SLU_NC, - * Dtype = SLU_S, Mtype = SLU_TRU. - * - * stat (output) SuperLUStat_t* - * Record the statistics on runtime and floating-point operation count. - * See slu_util.h for the definition of 'SuperLUStat_t'. - * - * info (output) int* - * = 0: successful exit - * < 0: if info = -i, the i-th argument had an illegal value - * > 0: if info = i, and i is - * <= A->ncol: number of zero pivots. They are replaced by small - * entries according to options->ILU_FillTol. - * > A->ncol: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. If lwork = -1, it is - * the estimated amount of space needed, plus A->ncol. - * - * ====================================================================== - * - * Local Working Arrays: - * ====================== - * m = number of rows in the matrix - * n = number of columns in the matrix - * - * marker[0:3*m-1]: marker[i] = j means that node i has been - * reached when working on column j. - * Storage: relative to original row subscripts - * NOTE: There are 4 of them: - * marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c; - * marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c; - * marker_relax(has its own space) is used for relaxed supernodes. - * - * parent[0:m-1]: parent vector used during dfs - * Storage: relative to new row subscripts - * - * xplore[0:m-1]: xplore[i] gives the location of the next (dfs) - * unexplored neighbor of i in lsub[*] - * - * segrep[0:nseg-1]: contains the list of supernodal representatives - * in topological order of the dfs. A supernode representative is the - * last column of a supernode. - * The maximum size of segrep[] is n. - * - * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a - * supernodal representative r, repfnz[r] is the location of the first - * nonzero in this segment. It is also used during the dfs: repfnz[r]>0 - * indicates the supernode r has been explored. - * NOTE: There are W of them, each used for one column of a panel. - * - * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below - * the panel diagonal. These are filled in during dpanel_dfs(), and are - * used later in the inner LU factorization within the panel. - * panel_lsub[]/dense[] pair forms the SPA data structure. - * NOTE: There are W of them. - * - * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; - * NOTE: there are W of them. - * - * tempv[0:*]: real temporary used for dense numeric kernels; - * The size of this array is defined by NUM_TEMPV() in slu_util.h. - * It is also used by the dropping routine ilu_ddrop_row(). - * </pre> - */ - -void -sgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, - int *etree, void *work, int lwork, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) -{ - /* Local working arrays */ - NCPformat *Astore; - int *iperm_r = NULL; /* inverse of perm_r; used when - options->Fact == SamePattern_SameRowPerm */ - int *iperm_c; /* inverse of perm_c */ - int *swap, *iswap; /* swap is used to store the row permutation - during the factorization. Initially, it is set - to iperm_c (row indeces of Pc*A*Pc'). - iswap is the inverse of swap. After the - factorization, it is equal to perm_r. */ - int *iwork; - float *swork; - int *segrep, *repfnz, *parent, *xplore; - int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ - int *marker, *marker_relax; - float *dense, *tempv; - int *relax_end, *relax_fsupc; - float *a; - int *asub; - int *xa_begin, *xa_end; - int *xsup, *supno; - int *xlsub, *xlusup, *xusub; - int nzlumax; - float *amax; - float drop_sum; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ - int *iwork2; /* used by the second dropping rule */ - - /* Local scalars */ - fact_t fact = options->Fact; - double diag_pivot_thresh = options->DiagPivotThresh; - double drop_tol = options->ILU_DropTol; /* tau */ - double fill_ini = options->ILU_FillTol; /* tau^hat */ - double gamma = options->ILU_FillFactor; - int drop_rule = options->ILU_DropRule; - milu_t milu = options->ILU_MILU; - double fill_tol; - int pivrow; /* pivotal row number in the original matrix A */ - int nseg1; /* no of segments in U-column above panel row jcol */ - int nseg; /* no of segments in each U-column */ - register int jcol; - register int kcol; /* end column of a relaxed snode */ - register int icol; - register int i, k, jj, new_next, iinfo; - int m, n, min_mn, jsupno, fsupc, nextlu, nextu; - int w_def; /* upper bound on panel width */ - int usepr, iperm_r_allocated = 0; - int nnzL, nnzU; - int *panel_histo = stat->panel_histo; - flops_t *ops = stat->ops; - - int last_drop;/* the last column which the dropping rules applied */ - int quota; - int nnzAj; /* number of nonzeros in A(:,1:j) */ - int nnzLj, nnzUj; - double tol_L = drop_tol, tol_U = drop_tol; - float zero = 0.0; - - /* Executable */ - iinfo = 0; - m = A->nrow; - n = A->ncol; - min_mn = SUPERLU_MIN(m, n); - Astore = A->Store; - a = Astore->nzval; - asub = Astore->rowind; - xa_begin = Astore->colbeg; - xa_end = Astore->colend; - - /* Allocate storage common to the factor routines */ - *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, - gamma, L, U, &Glu, &iwork, &swork); - if ( *info ) return; - - xsup = Glu.xsup; - supno = Glu.supno; - xlsub = Glu.xlsub; - xlusup = Glu.xlusup; - xusub = Glu.xusub; - - SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, - &repfnz, &panel_lsub, &marker_relax, &marker); - sSetRWork(m, panel_size, swork, &dense, &tempv); - - usepr = (fact == SamePattern_SameRowPerm); - if ( usepr ) { - /* Compute the inverse of perm_r */ - iperm_r = (int *) intMalloc(m); - for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; - iperm_r_allocated = 1; - } - - iperm_c = (int *) intMalloc(n); - for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; - swap = (int *)intMalloc(n); - for (k = 0; k < n; k++) swap[k] = iperm_c[k]; - iswap = (int *)intMalloc(n); - for (k = 0; k < n; k++) iswap[k] = perm_c[k]; - amax = (float *) floatMalloc(panel_size); - if (drop_rule & DROP_SECONDARY) - iwork2 = (int *)intMalloc(n); - else - iwork2 = NULL; - - nnzAj = 0; - nnzLj = 0; - nnzUj = 0; - last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95)); - - /* Identify relaxed snodes */ - relax_end = (int *) intMalloc(n); - relax_fsupc = (int *) intMalloc(n); - if ( options->SymmetricMode == YES ) - ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); - else - ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); - - ifill (perm_r, m, EMPTY); - ifill (marker, m * NO_MARKER, EMPTY); - supno[0] = -1; - xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; - w_def = panel_size; - - /* Mark the rows used by relaxed supernodes */ - ifill (marker_relax, m, EMPTY); - i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, - asub, marker_relax); -#if ( PRNTlevel >= 1) - printf("%d relaxed supernodes.\n", i); -#endif - - /* - * Work on one "panel" at a time. A panel is one of the following: - * (a) a relaxed supernode at the bottom of the etree, or - * (b) panel_size contiguous columns, defined by the user - */ - for (jcol = 0; jcol < min_mn; ) { - - if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ - kcol = relax_end[jcol]; /* end of the relaxed snode */ - panel_histo[kcol-jcol+1]++; - - /* Drop small rows in the previous supernode. */ - if (jcol > 0 && jcol < last_drop) { - int first = xsup[supno[jcol - 1]]; - int last = jcol - 1; - int quota; - - /* Compute the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * (m - first) / m - * (last - first + 1); - else if (drop_rule & DROP_COLUMN) { - int i; - quota = 0; - for (i = first; i <= last; i++) - quota += xa_end[i] - xa_begin[i]; - quota = gamma * quota * (m - first) / m; - } else if (drop_rule & DROP_AREA) - quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - - nnzLj; - else - quota = m * n; - fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); - - /* Drop small rows */ - i = ilu_sdrop_row(options, first, last, tol_L, quota, &nnzLj, - &fill_tol, &Glu, tempv, iwork2, 0); - /* Reset the parameters */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - < nnzLj) - tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); - else - tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); - } - if (fill_tol < 0) iinfo -= (int)fill_tol; -#ifdef DEBUG - num_drop_L += i * (last - first + 1); -#endif - } - - /* -------------------------------------- - * Factorize the relaxed supernode(jcol:kcol) - * -------------------------------------- */ - /* Determine the union of the row structure of the snode */ - if ( (*info = ilu_ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end, - marker, &Glu)) != 0 ) - return; - - nextu = xusub[jcol]; - nextlu = xlusup[jcol]; - jsupno = supno[jcol]; - fsupc = xsup[jsupno]; - new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); - nzlumax = Glu.nzlumax; - while ( new_next > nzlumax ) { - if ((*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) - return; - } - - for (icol = jcol; icol <= kcol; icol++) { - xusub[icol+1] = nextu; - - amax[0] = 0.0; - /* Scatter into SPA dense[*] */ - for (k = xa_begin[icol]; k < xa_end[icol]; k++) { - register float tmp = fabs(a[k]); - if (tmp > amax[0]) amax[0] = tmp; - dense[asub[k]] = a[k]; - } - nnzAj += xa_end[icol] - xa_begin[icol]; - if (amax[0] == 0.0) { - amax[0] = fill_ini; -#if ( PRNTlevel >= 1) - printf("Column %d is entirely zero!\n", icol); - fflush(stdout); -#endif - } - - /* Numeric update within the snode */ - ssnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); - - if (usepr) pivrow = iperm_r[icol]; - fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); - if ( (*info = ilu_spivotL(icol, diag_pivot_thresh, &usepr, - perm_r, iperm_c[icol], swap, iswap, - marker_relax, &pivrow, - amax[0] * fill_tol, milu, zero, - &Glu, stat)) ) { - iinfo++; - marker[pivrow] = kcol; - } - - } - - jcol = kcol + 1; - - } else { /* Work on one panel of panel_size columns */ - - /* Adjust panel_size so that a panel won't overlap with the next - * relaxed snode. - */ - panel_size = w_def; - for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) - if ( relax_end[k] != EMPTY ) { - panel_size = k - jcol; - break; - } - if ( k == min_mn ) panel_size = min_mn - jcol; - panel_histo[panel_size]++; - - /* symbolic factor on a panel of columns */ - ilu_spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, - dense, amax, panel_lsub, segrep, repfnz, - marker, parent, xplore, &Glu); - - /* numeric sup-panel updates in topological order */ - spanel_bmod(m, panel_size, jcol, nseg1, dense, - tempv, segrep, repfnz, &Glu, stat); - - /* Sparse LU within the panel, and below panel diagonal */ - for (jj = jcol; jj < jcol + panel_size; jj++) { - - k = (jj - jcol) * m; /* column index for w-wide arrays */ - - nseg = nseg1; /* Begin after all the panel segments */ - - nnzAj += xa_end[jj] - xa_begin[jj]; - - if ((*info = ilu_scolumn_dfs(m, jj, perm_r, &nseg, - &panel_lsub[k], segrep, &repfnz[k], - marker, parent, xplore, &Glu))) - return; - - /* Numeric updates */ - if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k], - tempv, &segrep[nseg1], &repfnz[k], - jcol, &Glu, stat)) != 0) return; - - /* Make a fill-in position if the column is entirely zero */ - if (xlsub[jj + 1] == xlsub[jj]) { - register int i, row; - int nextl; - int nzlmax = Glu.nzlmax; - int *lsub = Glu.lsub; - int *marker2 = marker + 2 * m; - - /* Allocate memory */ - nextl = xlsub[jj] + 1; - if (nextl >= nzlmax) { - int error = sLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); - if (error) { *info = error; return; } - lsub = Glu.lsub; - } - xlsub[jj + 1]++; - assert(xlusup[jj]==xlusup[jj+1]); - xlusup[jj + 1]++; - Glu.lusup[xlusup[jj]] = zero; - - /* Choose a row index (pivrow) for fill-in */ - for (i = jj; i < n; i++) - if (marker_relax[swap[i]] <= jj) break; - row = swap[i]; - marker2[row] = jj; - lsub[xlsub[jj]] = row; -#ifdef DEBUG - printf("Fill col %d.\n", jj); - fflush(stdout); -#endif - } - - /* Computer the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * jj / m; - else if (drop_rule & DROP_COLUMN) - quota = gamma * (xa_end[jj] - xa_begin[jj]) * - (jj + 1) / m; - else if (drop_rule & DROP_AREA) - quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; - else - quota = m; - - /* Copy the U-segments to ucol[*] and drop small entries */ - if ((*info = ilu_scopy_to_ucol(jj, nseg, segrep, &repfnz[k], - perm_r, &dense[k], drop_rule, - milu, amax[jj - jcol] * tol_U, - quota, &drop_sum, &nnzUj, &Glu, - iwork2)) != 0) - return; - - /* Reset the dropping threshold if required */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) - tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); - else - tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); - } - - drop_sum *= MILU_ALPHA; - if (usepr) pivrow = iperm_r[jj]; - fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); - if ( (*info = ilu_spivotL(jj, diag_pivot_thresh, &usepr, perm_r, - iperm_c[jj], swap, iswap, - marker_relax, &pivrow, - amax[jj - jcol] * fill_tol, milu, - drop_sum, &Glu, stat)) ) { - iinfo++; - marker[m + pivrow] = jj; - marker[2 * m + pivrow] = jj; - } - - /* Reset repfnz[] for this column */ - resetrep_col (nseg, segrep, &repfnz[k]); - - /* Start a new supernode, drop the previous one */ - if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { - int first = xsup[supno[jj - 1]]; - int last = jj - 1; - int quota; - - /* Compute the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * (m - first) / m - * (last - first + 1); - else if (drop_rule & DROP_COLUMN) { - int i; - quota = 0; - for (i = first; i <= last; i++) - quota += xa_end[i] - xa_begin[i]; - quota = gamma * quota * (m - first) / m; - } else if (drop_rule & DROP_AREA) - quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) - / m) - nnzLj; - else - quota = m * n; - fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / - (double)min_mn); - - /* Drop small rows */ - i = ilu_sdrop_row(options, first, last, tol_L, quota, - &nnzLj, &fill_tol, &Glu, tempv, iwork2, - 1); - - /* Reset the parameters */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - < nnzLj) - tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); - else - tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); - } - if (fill_tol < 0) iinfo -= (int)fill_tol; -#ifdef DEBUG - num_drop_L += i * (last - first + 1); -#endif - } /* if start a new supernode */ - - } /* for */ - - jcol += panel_size; /* Move to the next panel */ - - } /* else */ - - } /* for */ - - *info = iinfo; - - if ( m > n ) { - k = 0; - for (i = 0; i < m; ++i) - if ( perm_r[i] == EMPTY ) { - perm_r[i] = n + k; - ++k; - } - } - - ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); - fixupL(min_mn, perm_r, &Glu); - - sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */ - - if ( fact == SamePattern_SameRowPerm ) { - /* L and U structures may have changed due to possibly different - pivoting, even though the storage is available. - There could also be memory expansions, so the array locations - may have changed, */ - ((SCformat *)L->Store)->nnz = nnzL; - ((SCformat *)L->Store)->nsuper = Glu.supno[n]; - ((SCformat *)L->Store)->nzval = Glu.lusup; - ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; - ((SCformat *)L->Store)->rowind = Glu.lsub; - ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; - ((NCformat *)U->Store)->nnz = nnzU; - ((NCformat *)U->Store)->nzval = Glu.ucol; - ((NCformat *)U->Store)->rowind = Glu.usub; - ((NCformat *)U->Store)->colptr = Glu.xusub; - } else { - sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, - Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, - Glu.xsup, SLU_SC, SLU_S, SLU_TRLU); - sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, - Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU); - } - - ops[FACT] += ops[TRSV] + ops[GEMV]; - stat->expansions = --(Glu.num_expansions); - - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - SUPERLU_FREE (swap); - SUPERLU_FREE (iswap); - SUPERLU_FREE (relax_fsupc); - SUPERLU_FREE (amax); - if ( iwork2 ) SUPERLU_FREE (iwork2); - -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/slacon.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/slacon.c.bak deleted file mode 100644 index 4e02fdc..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/slacon.c.bak +++ /dev/null @@ -1,236 +0,0 @@ - -/*! @file slacon.c - * \brief Estimates the 1-norm - * - * <pre> - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * </pre> - */ -#include <math.h> -#include "slu_Cnames.h" - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * SLACON estimates the 1-norm of a square matrix A. - * Reverse communication is used for evaluating matrix-vector products. - * - * - * Arguments - * ========= - * - * N (input) INT - * The order of the matrix. N >= 1. - * - * V (workspace) FLOAT PRECISION array, dimension (N) - * On the final return, V = A*W, where EST = norm(V)/norm(W) - * (W is not returned). - * - * X (input/output) FLOAT PRECISION array, dimension (N) - * On an intermediate return, X should be overwritten by - * A * X, if KASE=1, - * A' * X, if KASE=2, - * and SLACON must be re-called with all the other parameters - * unchanged. - * - * ISGN (workspace) INT array, dimension (N) - * - * EST (output) FLOAT PRECISION - * An estimate (a lower bound) for norm(A). - * - * KASE (input/output) INT - * On the initial call to SLACON, KASE should be 0. - * On an intermediate return, KASE will be 1 or 2, indicating - * whether X should be overwritten by A * X or A' * X. - * On the final return from SLACON, KASE will again be 0. - * - * Further Details - * ======= ======= - * - * Contributed by Nick Higham, University of Manchester. - * Originally named CONEST, dated March 16, 1988. - * - * Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of - * a real or complex matrix, with applications to condition estimation", - * ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. - * ===================================================================== - * </pre> - */ - -int -slacon_(int *n, float *v, float *x, int *isgn, float *est, int *kase) - -{ - - - /* Table of constant values */ - int c__1 = 1; - float zero = 0.0; - float one = 1.0; - - /* Local variables */ - static int iter; - static int jump, jlast; - static float altsgn, estold; - static int i, j; - float temp; -#ifdef _CRAY - extern int ISAMAX(int *, float *, int *); - extern float SASUM(int *, float *, int *); - extern int SCOPY(int *, float *, int *, float *, int *); -#else - extern int isamax_(int *, float *, int *); - extern float sasum_(int *, float *, int *); - extern int scopy_(int *, float *, int *, float *, int *); -#endif -#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a)) /* Copy sign */ -#define i_dnnt(a) \ - ( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */ - - if ( *kase == 0 ) { - for (i = 0; i < *n; ++i) { - x[i] = 1. / (float) (*n); - } - *kase = 1; - jump = 1; - return 0; - } - - switch (jump) { - case 1: goto L20; - case 2: goto L40; - case 3: goto L70; - case 4: goto L110; - case 5: goto L140; - } - - /* ................ ENTRY (JUMP = 1) - FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ - L20: - if (*n == 1) { - v[0] = x[0]; - *est = fabs(v[0]); - /* ... QUIT */ - goto L150; - } -#ifdef _CRAY - *est = SASUM(n, x, &c__1); -#else - *est = sasum_(n, x, &c__1); -#endif - - for (i = 0; i < *n; ++i) { - x[i] = d_sign(one, x[i]); - isgn[i] = i_dnnt(x[i]); - } - *kase = 2; - jump = 2; - return 0; - - /* ................ ENTRY (JUMP = 2) - FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ -L40: -#ifdef _CRAY - j = ISAMAX(n, &x[0], &c__1); -#else - j = isamax_(n, &x[0], &c__1); -#endif - --j; - iter = 2; - - /* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ -L50: - for (i = 0; i < *n; ++i) x[i] = zero; - x[j] = one; - *kase = 1; - jump = 3; - return 0; - - /* ................ ENTRY (JUMP = 3) - X HAS BEEN OVERWRITTEN BY A*X. */ -L70: -#ifdef _CRAY - SCOPY(n, x, &c__1, v, &c__1); -#else - scopy_(n, x, &c__1, v, &c__1); -#endif - estold = *est; -#ifdef _CRAY - *est = SASUM(n, v, &c__1); -#else - *est = sasum_(n, v, &c__1); -#endif - - for (i = 0; i < *n; ++i) - if (i_dnnt(d_sign(one, x[i])) != isgn[i]) - goto L90; - - /* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */ - goto L120; - -L90: - /* TEST FOR CYCLING. */ - if (*est <= estold) goto L120; - - for (i = 0; i < *n; ++i) { - x[i] = d_sign(one, x[i]); - isgn[i] = i_dnnt(x[i]); - } - *kase = 2; - jump = 4; - return 0; - - /* ................ ENTRY (JUMP = 4) - X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */ -L110: - jlast = j; -#ifdef _CRAY - j = ISAMAX(n, &x[0], &c__1); -#else - j = isamax_(n, &x[0], &c__1); -#endif - --j; - if (x[jlast] != fabs(x[j]) && iter < 5) { - ++iter; - goto L50; - } - - /* ITERATION COMPLETE. FINAL STAGE. */ -L120: - altsgn = 1.; - for (i = 1; i <= *n; ++i) { - x[i-1] = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.); - altsgn = -altsgn; - } - *kase = 1; - jump = 5; - return 0; - - /* ................ ENTRY (JUMP = 5) - X HAS BEEN OVERWRITTEN BY A*X. */ -L140: -#ifdef _CRAY - temp = SASUM(n, x, &c__1) / (float)(*n * 3) * 2.; -#else - temp = sasum_(n, x, &c__1) / (float)(*n * 3) * 2.; -#endif - if (temp > *est) { -#ifdef _CRAY - SCOPY(n, &x[0], &c__1, &v[0], &c__1); -#else - scopy_(n, &x[0], &c__1, &v[0], &c__1); -#endif - *est = temp; - } - -L150: - *kase = 0; - return 0; - -} /* slacon_ */ diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/zgsitrf.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/zgsitrf.c.bak deleted file mode 100644 index a763c98..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/zgsitrf.c.bak +++ /dev/null @@ -1,629 +0,0 @@ - -/*! @file zgsitf.c - * \brief Computes an ILU factorization of a general sparse matrix - * - * <pre> - * -- SuperLU routine (version 4.0) -- - * Lawrence Berkeley National Laboratory. - * June 30, 2009 - * </pre> - */ - -#include "slu_zdefs.h" - -#ifdef DEBUG -int num_drop_L; -#endif - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * ZGSITRF computes an ILU factorization of a general sparse m-by-n - * matrix A using partial pivoting with row interchanges. - * The factorization has the form - * Pr * A = L * U - * where Pr is a row permutation matrix, L is lower triangular with unit - * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper - * triangular (upper trapezoidal if A->nrow < A->ncol). - * - * See supermatrix.h for the definition of 'SuperMatrix' structure. - * - * Arguments - * ========= - * - * options (input) superlu_options_t* - * The structure defines the input parameters to control - * how the ILU decomposition will be performed. - * - * A (input) SuperMatrix* - * Original matrix A, permuted by columns, of dimension - * (A->nrow, A->ncol). The type of A can be: - * Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE. - * - * relax (input) int - * To control degree of relaxing supernodes. If the number - * of nodes (columns) in a subtree of the elimination tree is less - * than relax, this subtree is considered as one supernode, - * regardless of the row structures of those columns. - * - * panel_size (input) int - * A panel consists of at most panel_size consecutive columns. - * - * etree (input) int*, dimension (A->ncol) - * Elimination tree of A'*A. - * Note: etree is a vector of parent pointers for a forest whose - * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. - * On input, the columns of A should be permuted so that the - * etree is in a certain postorder. - * - * work (input/output) void*, size (lwork) (in bytes) - * User-supplied work space and space for the output data structures. - * Not referenced if lwork = 0; - * - * lwork (input) int - * Specifies the size of work array in bytes. - * = 0: allocate space internally by system malloc; - * > 0: use user-supplied work array of length lwork in bytes, - * returns error if space runs out. - * = -1: the routine guesses the amount of space needed without - * performing the factorization, and returns it in - * *info; no other side effects. - * - * perm_c (input) int*, dimension (A->ncol) - * Column permutation vector, which defines the - * permutation matrix Pc; perm_c[i] = j means column i of A is - * in position j in A*Pc. - * When searching for diagonal, perm_c[*] is applied to the - * row subscripts of A, so that diagonal threshold pivoting - * can find the diagonal of A, rather than that of A*Pc. - * - * perm_r (input/output) int*, dimension (A->nrow) - * Row permutation vector which defines the permutation matrix Pr, - * perm_r[i] = j means row i of A is in position j in Pr*A. - * If options->Fact = SamePattern_SameRowPerm, the pivoting routine - * will try to use the input perm_r, unless a certain threshold - * criterion is violated. In that case, perm_r is overwritten by - * a new permutation determined by partial pivoting or diagonal - * threshold pivoting. - * Otherwise, perm_r is output argument; - * - * L (output) SuperMatrix* - * The factor L from the factorization Pr*A=L*U; use compressed row - * subscripts storage for supernodes, i.e., L has type: - * Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU. - * - * U (output) SuperMatrix* - * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise - * storage scheme, i.e., U has types: Stype = SLU_NC, - * Dtype = SLU_Z, Mtype = SLU_TRU. - * - * stat (output) SuperLUStat_t* - * Record the statistics on runtime and floating-point operation count. - * See slu_util.h for the definition of 'SuperLUStat_t'. - * - * info (output) int* - * = 0: successful exit - * < 0: if info = -i, the i-th argument had an illegal value - * > 0: if info = i, and i is - * <= A->ncol: number of zero pivots. They are replaced by small - * entries according to options->ILU_FillTol. - * > A->ncol: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. If lwork = -1, it is - * the estimated amount of space needed, plus A->ncol. - * - * ====================================================================== - * - * Local Working Arrays: - * ====================== - * m = number of rows in the matrix - * n = number of columns in the matrix - * - * marker[0:3*m-1]: marker[i] = j means that node i has been - * reached when working on column j. - * Storage: relative to original row subscripts - * NOTE: There are 4 of them: - * marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c; - * marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c; - * marker_relax(has its own space) is used for relaxed supernodes. - * - * parent[0:m-1]: parent vector used during dfs - * Storage: relative to new row subscripts - * - * xplore[0:m-1]: xplore[i] gives the location of the next (dfs) - * unexplored neighbor of i in lsub[*] - * - * segrep[0:nseg-1]: contains the list of supernodal representatives - * in topological order of the dfs. A supernode representative is the - * last column of a supernode. - * The maximum size of segrep[] is n. - * - * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a - * supernodal representative r, repfnz[r] is the location of the first - * nonzero in this segment. It is also used during the dfs: repfnz[r]>0 - * indicates the supernode r has been explored. - * NOTE: There are W of them, each used for one column of a panel. - * - * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below - * the panel diagonal. These are filled in during dpanel_dfs(), and are - * used later in the inner LU factorization within the panel. - * panel_lsub[]/dense[] pair forms the SPA data structure. - * NOTE: There are W of them. - * - * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; - * NOTE: there are W of them. - * - * tempv[0:*]: real temporary used for dense numeric kernels; - * The size of this array is defined by NUM_TEMPV() in slu_util.h. - * It is also used by the dropping routine ilu_ddrop_row(). - * </pre> - */ - -void -zgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, - int *etree, void *work, int lwork, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) -{ - /* Local working arrays */ - NCPformat *Astore; - int *iperm_r = NULL; /* inverse of perm_r; used when - options->Fact == SamePattern_SameRowPerm */ - int *iperm_c; /* inverse of perm_c */ - int *swap, *iswap; /* swap is used to store the row permutation - during the factorization. Initially, it is set - to iperm_c (row indeces of Pc*A*Pc'). - iswap is the inverse of swap. After the - factorization, it is equal to perm_r. */ - int *iwork; - doublecomplex *zwork; - int *segrep, *repfnz, *parent, *xplore; - int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ - int *marker, *marker_relax; - doublecomplex *dense, *tempv; - double *dtempv; - int *relax_end, *relax_fsupc; - doublecomplex *a; - int *asub; - int *xa_begin, *xa_end; - int *xsup, *supno; - int *xlsub, *xlusup, *xusub; - int nzlumax; - double *amax; - doublecomplex drop_sum; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ - int *iwork2; /* used by the second dropping rule */ - - /* Local scalars */ - fact_t fact = options->Fact; - double diag_pivot_thresh = options->DiagPivotThresh; - double drop_tol = options->ILU_DropTol; /* tau */ - double fill_ini = options->ILU_FillTol; /* tau^hat */ - double gamma = options->ILU_FillFactor; - int drop_rule = options->ILU_DropRule; - milu_t milu = options->ILU_MILU; - double fill_tol; - int pivrow; /* pivotal row number in the original matrix A */ - int nseg1; /* no of segments in U-column above panel row jcol */ - int nseg; /* no of segments in each U-column */ - register int jcol; - register int kcol; /* end column of a relaxed snode */ - register int icol; - register int i, k, jj, new_next, iinfo; - int m, n, min_mn, jsupno, fsupc, nextlu, nextu; - int w_def; /* upper bound on panel width */ - int usepr, iperm_r_allocated = 0; - int nnzL, nnzU; - int *panel_histo = stat->panel_histo; - flops_t *ops = stat->ops; - - int last_drop;/* the last column which the dropping rules applied */ - int quota; - int nnzAj; /* number of nonzeros in A(:,1:j) */ - int nnzLj, nnzUj; - double tol_L = drop_tol, tol_U = drop_tol; - doublecomplex zero = {0.0, 0.0}; - - /* Executable */ - iinfo = 0; - m = A->nrow; - n = A->ncol; - min_mn = SUPERLU_MIN(m, n); - Astore = A->Store; - a = Astore->nzval; - asub = Astore->rowind; - xa_begin = Astore->colbeg; - xa_end = Astore->colend; - - /* Allocate storage common to the factor routines */ - *info = zLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, - gamma, L, U, &Glu, &iwork, &zwork); - if ( *info ) return; - - xsup = Glu.xsup; - supno = Glu.supno; - xlsub = Glu.xlsub; - xlusup = Glu.xlusup; - xusub = Glu.xusub; - - SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, - &repfnz, &panel_lsub, &marker_relax, &marker); - zSetRWork(m, panel_size, zwork, &dense, &tempv); - - usepr = (fact == SamePattern_SameRowPerm); - if ( usepr ) { - /* Compute the inverse of perm_r */ - iperm_r = (int *) intMalloc(m); - for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; - iperm_r_allocated = 1; - } - - iperm_c = (int *) intMalloc(n); - for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; - swap = (int *)intMalloc(n); - for (k = 0; k < n; k++) swap[k] = iperm_c[k]; - iswap = (int *)intMalloc(n); - for (k = 0; k < n; k++) iswap[k] = perm_c[k]; - amax = (double *) doubleMalloc(panel_size); - if (drop_rule & DROP_SECONDARY) - iwork2 = (int *)intMalloc(n); - else - iwork2 = NULL; - - nnzAj = 0; - nnzLj = 0; - nnzUj = 0; - last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95)); - - /* Identify relaxed snodes */ - relax_end = (int *) intMalloc(n); - relax_fsupc = (int *) intMalloc(n); - if ( options->SymmetricMode == YES ) - ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); - else - ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); - - ifill (perm_r, m, EMPTY); - ifill (marker, m * NO_MARKER, EMPTY); - supno[0] = -1; - xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; - w_def = panel_size; - - /* Mark the rows used by relaxed supernodes */ - ifill (marker_relax, m, EMPTY); - i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, - asub, marker_relax); -#if ( PRNTlevel >= 1) - printf("%d relaxed supernodes.\n", i); -#endif - - /* - * Work on one "panel" at a time. A panel is one of the following: - * (a) a relaxed supernode at the bottom of the etree, or - * (b) panel_size contiguous columns, defined by the user - */ - for (jcol = 0; jcol < min_mn; ) { - - if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ - kcol = relax_end[jcol]; /* end of the relaxed snode */ - panel_histo[kcol-jcol+1]++; - - /* Drop small rows in the previous supernode. */ - if (jcol > 0 && jcol < last_drop) { - int first = xsup[supno[jcol - 1]]; - int last = jcol - 1; - int quota; - - /* Compute the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * (m - first) / m - * (last - first + 1); - else if (drop_rule & DROP_COLUMN) { - int i; - quota = 0; - for (i = first; i <= last; i++) - quota += xa_end[i] - xa_begin[i]; - quota = gamma * quota * (m - first) / m; - } else if (drop_rule & DROP_AREA) - quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - - nnzLj; - else - quota = m * n; - fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); - - /* Drop small rows */ - dtempv = (double *) tempv; - i = ilu_zdrop_row(options, first, last, tol_L, quota, &nnzLj, - &fill_tol, &Glu, dtempv, iwork2, 0); - /* Reset the parameters */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - < nnzLj) - tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); - else - tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); - } - if (fill_tol < 0) iinfo -= (int)fill_tol; -#ifdef DEBUG - num_drop_L += i * (last - first + 1); -#endif - } - - /* -------------------------------------- - * Factorize the relaxed supernode(jcol:kcol) - * -------------------------------------- */ - /* Determine the union of the row structure of the snode */ - if ( (*info = ilu_zsnode_dfs(jcol, kcol, asub, xa_begin, xa_end, - marker, &Glu)) != 0 ) - return; - - nextu = xusub[jcol]; - nextlu = xlusup[jcol]; - jsupno = supno[jcol]; - fsupc = xsup[jsupno]; - new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); - nzlumax = Glu.nzlumax; - while ( new_next > nzlumax ) { - if ((*info = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) - return; - } - - for (icol = jcol; icol <= kcol; icol++) { - xusub[icol+1] = nextu; - - amax[0] = 0.0; - /* Scatter into SPA dense[*] */ - for (k = xa_begin[icol]; k < xa_end[icol]; k++) { - register double tmp = z_abs1 (&a[k]); - if (tmp > amax[0]) amax[0] = tmp; - dense[asub[k]] = a[k]; - } - nnzAj += xa_end[icol] - xa_begin[icol]; - if (amax[0] == 0.0) { - amax[0] = fill_ini; -#if ( PRNTlevel >= 1) - printf("Column %d is entirely zero!\n", icol); - fflush(stdout); -#endif - } - - /* Numeric update within the snode */ - zsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); - - if (usepr) pivrow = iperm_r[icol]; - fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); - if ( (*info = ilu_zpivotL(icol, diag_pivot_thresh, &usepr, - perm_r, iperm_c[icol], swap, iswap, - marker_relax, &pivrow, - amax[0] * fill_tol, milu, zero, - &Glu, stat)) ) { - iinfo++; - marker[pivrow] = kcol; - } - - } - - jcol = kcol + 1; - - } else { /* Work on one panel of panel_size columns */ - - /* Adjust panel_size so that a panel won't overlap with the next - * relaxed snode. - */ - panel_size = w_def; - for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) - if ( relax_end[k] != EMPTY ) { - panel_size = k - jcol; - break; - } - if ( k == min_mn ) panel_size = min_mn - jcol; - panel_histo[panel_size]++; - - /* symbolic factor on a panel of columns */ - ilu_zpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, - dense, amax, panel_lsub, segrep, repfnz, - marker, parent, xplore, &Glu); - - /* numeric sup-panel updates in topological order */ - zpanel_bmod(m, panel_size, jcol, nseg1, dense, - tempv, segrep, repfnz, &Glu, stat); - - /* Sparse LU within the panel, and below panel diagonal */ - for (jj = jcol; jj < jcol + panel_size; jj++) { - - k = (jj - jcol) * m; /* column index for w-wide arrays */ - - nseg = nseg1; /* Begin after all the panel segments */ - - nnzAj += xa_end[jj] - xa_begin[jj]; - - if ((*info = ilu_zcolumn_dfs(m, jj, perm_r, &nseg, - &panel_lsub[k], segrep, &repfnz[k], - marker, parent, xplore, &Glu))) - return; - - /* Numeric updates */ - if ((*info = zcolumn_bmod(jj, (nseg - nseg1), &dense[k], - tempv, &segrep[nseg1], &repfnz[k], - jcol, &Glu, stat)) != 0) return; - - /* Make a fill-in position if the column is entirely zero */ - if (xlsub[jj + 1] == xlsub[jj]) { - register int i, row; - int nextl; - int nzlmax = Glu.nzlmax; - int *lsub = Glu.lsub; - int *marker2 = marker + 2 * m; - - /* Allocate memory */ - nextl = xlsub[jj] + 1; - if (nextl >= nzlmax) { - int error = zLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); - if (error) { *info = error; return; } - lsub = Glu.lsub; - } - xlsub[jj + 1]++; - assert(xlusup[jj]==xlusup[jj+1]); - xlusup[jj + 1]++; - Glu.lusup[xlusup[jj]] = zero; - - /* Choose a row index (pivrow) for fill-in */ - for (i = jj; i < n; i++) - if (marker_relax[swap[i]] <= jj) break; - row = swap[i]; - marker2[row] = jj; - lsub[xlsub[jj]] = row; -#ifdef DEBUG - printf("Fill col %d.\n", jj); - fflush(stdout); -#endif - } - - /* Computer the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * jj / m; - else if (drop_rule & DROP_COLUMN) - quota = gamma * (xa_end[jj] - xa_begin[jj]) * - (jj + 1) / m; - else if (drop_rule & DROP_AREA) - quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; - else - quota = m; - - /* Copy the U-segments to ucol[*] and drop small entries */ - if ((*info = ilu_zcopy_to_ucol(jj, nseg, segrep, &repfnz[k], - perm_r, &dense[k], drop_rule, - milu, amax[jj - jcol] * tol_U, - quota, &drop_sum, &nnzUj, &Glu, - iwork2)) != 0) - return; - - /* Reset the dropping threshold if required */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) - tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); - else - tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); - } - - zd_mult(&drop_sum, &drop_sum, MILU_ALPHA); - if (usepr) pivrow = iperm_r[jj]; - fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); - if ( (*info = ilu_zpivotL(jj, diag_pivot_thresh, &usepr, perm_r, - iperm_c[jj], swap, iswap, - marker_relax, &pivrow, - amax[jj - jcol] * fill_tol, milu, - drop_sum, &Glu, stat)) ) { - iinfo++; - marker[m + pivrow] = jj; - marker[2 * m + pivrow] = jj; - } - - /* Reset repfnz[] for this column */ - resetrep_col (nseg, segrep, &repfnz[k]); - - /* Start a new supernode, drop the previous one */ - if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { - int first = xsup[supno[jj - 1]]; - int last = jj - 1; - int quota; - - /* Compute the quota */ - if (drop_rule & DROP_PROWS) - quota = gamma * Astore->nnz / m * (m - first) / m - * (last - first + 1); - else if (drop_rule & DROP_COLUMN) { - int i; - quota = 0; - for (i = first; i <= last; i++) - quota += xa_end[i] - xa_begin[i]; - quota = gamma * quota * (m - first) / m; - } else if (drop_rule & DROP_AREA) - quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) - / m) - nnzLj; - else - quota = m * n; - fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / - (double)min_mn); - - /* Drop small rows */ - dtempv = (double *) tempv; - i = ilu_zdrop_row(options, first, last, tol_L, quota, - &nnzLj, &fill_tol, &Glu, dtempv, iwork2, - 1); - - /* Reset the parameters */ - if (drop_rule & DROP_DYNAMIC) { - if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - < nnzLj) - tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); - else - tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); - } - if (fill_tol < 0) iinfo -= (int)fill_tol; -#ifdef DEBUG - num_drop_L += i * (last - first + 1); -#endif - } /* if start a new supernode */ - - } /* for */ - - jcol += panel_size; /* Move to the next panel */ - - } /* else */ - - } /* for */ - - *info = iinfo; - - if ( m > n ) { - k = 0; - for (i = 0; i < m; ++i) - if ( perm_r[i] == EMPTY ) { - perm_r[i] = n + k; - ++k; - } - } - - ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); - fixupL(min_mn, perm_r, &Glu); - - zLUWorkFree(iwork, zwork, &Glu); /* Free work space and compress storage */ - - if ( fact == SamePattern_SameRowPerm ) { - /* L and U structures may have changed due to possibly different - pivoting, even though the storage is available. - There could also be memory expansions, so the array locations - may have changed, */ - ((SCformat *)L->Store)->nnz = nnzL; - ((SCformat *)L->Store)->nsuper = Glu.supno[n]; - ((SCformat *)L->Store)->nzval = Glu.lusup; - ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; - ((SCformat *)L->Store)->rowind = Glu.lsub; - ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; - ((NCformat *)U->Store)->nnz = nnzU; - ((NCformat *)U->Store)->nzval = Glu.ucol; - ((NCformat *)U->Store)->rowind = Glu.usub; - ((NCformat *)U->Store)->colptr = Glu.xusub; - } else { - zCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, - Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, - Glu.xsup, SLU_SC, SLU_Z, SLU_TRLU); - zCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, - Glu.usub, Glu.xusub, SLU_NC, SLU_Z, SLU_TRU); - } - - ops[FACT] += ops[TRSV] + ops[GEMV]; - stat->expansions = --(Glu.num_expansions); - - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - SUPERLU_FREE (swap); - SUPERLU_FREE (iswap); - SUPERLU_FREE (relax_fsupc); - SUPERLU_FREE (amax); - if ( iwork2 ) SUPERLU_FREE (iwork2); - -} diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/zlacon.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/zlacon.c.bak deleted file mode 100644 index b2cd1ed..0000000 --- a/thirdparty/superlu/SuperLU_4.1/SRC/zlacon.c.bak +++ /dev/null @@ -1,221 +0,0 @@ - -/*! @file zlacon.c - * \brief Estimates the 1-norm - * - * <pre> - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * </pre> - */ -#include <math.h> -#include "slu_Cnames.h" -#include "slu_dcomplex.h" - -/*! \brief - * - * <pre> - * Purpose - * ======= - * - * ZLACON estimates the 1-norm of a square matrix A. - * Reverse communication is used for evaluating matrix-vector products. - * - * - * Arguments - * ========= - * - * N (input) INT - * The order of the matrix. N >= 1. - * - * V (workspace) DOUBLE COMPLEX PRECISION array, dimension (N) - * On the final return, V = A*W, where EST = norm(V)/norm(W) - * (W is not returned). - * - * X (input/output) DOUBLE COMPLEX PRECISION array, dimension (N) - * On an intermediate return, X should be overwritten by - * A * X, if KASE=1, - * A' * X, if KASE=2, - * where A' is the conjugate transpose of A, - * and ZLACON must be re-called with all the other parameters - * unchanged. - * - * - * EST (output) DOUBLE PRECISION - * An estimate (a lower bound) for norm(A). - * - * KASE (input/output) INT - * On the initial call to ZLACON, KASE should be 0. - * On an intermediate return, KASE will be 1 or 2, indicating - * whether X should be overwritten by A * X or A' * X. - * On the final return from ZLACON, KASE will again be 0. - * - * Further Details - * ======= ======= - * - * Contributed by Nick Higham, University of Manchester. - * Originally named CONEST, dated March 16, 1988. - * - * Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of - * a real or complex matrix, with applications to condition estimation", - * ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. - * ===================================================================== - * </pre> - */ - -int -zlacon_(int *n, doublecomplex *v, doublecomplex *x, double *est, int *kase) - -{ - - - /* Table of constant values */ - int c__1 = 1; - doublecomplex zero = {0.0, 0.0}; - doublecomplex one = {1.0, 0.0}; - - /* System generated locals */ - double d__1; - - /* Local variables */ - static int iter; - static int jump, jlast; - static double altsgn, estold; - static int i, j; - double temp; - double safmin; - extern double dlamch_(char *); - extern int izmax1_(int *, doublecomplex *, int *); - extern double dzsum1_(int *, doublecomplex *, int *); - - safmin = dlamch_("Safe minimum"); - if ( *kase == 0 ) { - for (i = 0; i < *n; ++i) { - x[i].r = 1. / (double) (*n); - x[i].i = 0.; - } - *kase = 1; - jump = 1; - return 0; - } - - switch (jump) { - case 1: goto L20; - case 2: goto L40; - case 3: goto L70; - case 4: goto L110; - case 5: goto L140; - } - - /* ................ ENTRY (JUMP = 1) - FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ - L20: - if (*n == 1) { - v[0] = x[0]; - *est = z_abs(&v[0]); - /* ... QUIT */ - goto L150; - } - *est = dzsum1_(n, x, &c__1); - - for (i = 0; i < *n; ++i) { - d__1 = z_abs(&x[i]); - if (d__1 > safmin) { - d__1 = 1 / d__1; - x[i].r *= d__1; - x[i].i *= d__1; - } else { - x[i] = one; - } - } - *kase = 2; - jump = 2; - return 0; - - /* ................ ENTRY (JUMP = 2) - FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ -L40: - j = izmax1_(n, &x[0], &c__1); - --j; - iter = 2; - - /* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ -L50: - for (i = 0; i < *n; ++i) x[i] = zero; - x[j] = one; - *kase = 1; - jump = 3; - return 0; - - /* ................ ENTRY (JUMP = 3) - X HAS BEEN OVERWRITTEN BY A*X. */ -L70: -#ifdef _CRAY - CCOPY(n, x, &c__1, v, &c__1); -#else - zcopy_(n, x, &c__1, v, &c__1); -#endif - estold = *est; - *est = dzsum1_(n, v, &c__1); - - -L90: - /* TEST FOR CYCLING. */ - if (*est <= estold) goto L120; - - for (i = 0; i < *n; ++i) { - d__1 = z_abs(&x[i]); - if (d__1 > safmin) { - d__1 = 1 / d__1; - x[i].r *= d__1; - x[i].i *= d__1; - } else { - x[i] = one; - } - } - *kase = 2; - jump = 4; - return 0; - - /* ................ ENTRY (JUMP = 4) - X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */ -L110: - jlast = j; - j = izmax1_(n, &x[0], &c__1); - --j; - if (x[jlast].r != (d__1 = x[j].r, fabs(d__1)) && iter < 5) { - ++iter; - goto L50; - } - - /* ITERATION COMPLETE. FINAL STAGE. */ -L120: - altsgn = 1.; - for (i = 1; i <= *n; ++i) { - x[i-1].r = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.); - x[i-1].i = 0.; - altsgn = -altsgn; - } - *kase = 1; - jump = 5; - return 0; - - /* ................ ENTRY (JUMP = 5) - X HAS BEEN OVERWRITTEN BY A*X. */ -L140: - temp = dzsum1_(n, x, &c__1) / (double)(*n * 3) * 2.; - if (temp > *est) { -#ifdef _CRAY - CCOPY(n, &x[0], &c__1, &v[0], &c__1); -#else - zcopy_(n, &x[0], &c__1, &v[0], &c__1); -#endif - *est = temp; - } - -L150: - *kase = 0; - return 0; - -} /* zlacon_ */ diff --git a/thirdparty/superlu/SuperLU_4.1/TESTING/MATGEN/Cnames.h.bak b/thirdparty/superlu/SuperLU_4.1/TESTING/MATGEN/Cnames.h.bak deleted file mode 100644 index 28eecfa..0000000 --- a/thirdparty/superlu/SuperLU_4.1/TESTING/MATGEN/Cnames.h.bak +++ /dev/null @@ -1,197 +0,0 @@ -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 1, 1997 - * - */ -#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */ -#define __SUPERLU_CNAMES - -/* - * These macros define how C routines will be called. ADD_ assumes that - * they will be called by fortran, which expects C routines to have an - * underscore postfixed to the name (Suns, and the Intel expect this). - * NOCHANGE indicates that fortran will be calling, and that it expects - * the name called by fortran to be identical to that compiled by the C - * (RS6K's do this). UPCASE says it expects C routines called by fortran - * to be in all upcase (CRAY wants this). - */ - -#define ADD_ 0 -#define NOCHANGE 1 -#define UPCASE 2 -#define C_CALL 3 - -#ifdef UpCase -#define F77_CALL_C UPCASE -#endif - -#ifdef NoChange -#define F77_CALL_C NOCHANGE -#endif - -#ifdef Add_ -#define F77_CALL_C ADD_ -#endif - -#ifndef F77_CALL_C -#define F77_CALL_C ADD_ -#endif - -#if (F77_CALL_C == ADD_) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * No redefinition necessary to have following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void dgemm_(...) - * - * This is the default. - */ - -#endif - -#if (F77_CALL_C == UPCASE) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void DGEMM(...) - */ -#define sasum_ SASUM -#define isamax_ ISAMAX -#define scopy_ SCOPY -#define sscal_ SSCAL -#define sger_ SGER -#define snrm2_ SNRM2 -#define ssymv_ SSYMV -#define sdot_ SDOT -#define saxpy_ SAXPY -#define ssyr2_ SSYR2 -#define srot_ SROT -#define sgemv_ SGEMV -#define strsv_ STRSV -#define sgemm_ SGEMM -#define strsm_ STRSM - -#define dasum_ SASUM -#define idamax_ ISAMAX -#define dcopy_ SCOPY -#define dscal_ SSCAL -#define dger_ SGER -#define dnrm2_ SNRM2 -#define dsymv_ SSYMV -#define ddot_ SDOT -#define daxpy_ SAXPY -#define dsyr2_ SSYR2 -#define drot_ SROT -#define dgemv_ SGEMV -#define dtrsv_ STRSV -#define dgemm_ SGEMM -#define dtrsm_ STRSM - -#define scasum_ SCASUM -#define icamax_ ICAMAX -#define ccopy_ CCOPY -#define cscal_ CSCAL -#define scnrm2_ SCNRM2 -#define caxpy_ CAXPY -#define cgemv_ CGEMV -#define ctrsv_ CTRSV -#define cgemm_ CGEMM -#define ctrsm_ CTRSM -#define cgerc_ CGERC -#define chemv_ CHEMV -#define cher2_ CHER2 - -#define dzasum_ SCASUM -#define izamax_ ICAMAX -#define zcopy_ CCOPY -#define zscal_ CSCAL -#define dznrm2_ SCNRM2 -#define zaxpy_ CAXPY -#define zgemv_ CGEMV -#define ztrsv_ CTRSV -#define zgemm_ CGEMM -#define ztrsm_ CTRSM -#define zgerc_ CGERC -#define zhemv_ CHEMV -#define zher2_ CHER2 - -#define c_bridge_dgssv_ C_BRIDGE_DGSSV -#endif - -#if (F77_CALL_C == NOCHANGE) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * for following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void dgemm(...) - */ -#define sasum_ sasum -#define isamax_ isamax -#define scopy_ scopy -#define sscal_ sscal -#define sger_ sger -#define snrm2_ snrm2 -#define ssymv_ ssymv -#define sdot_ sdot -#define saxpy_ saxpy -#define ssyr2_ ssyr2 -#define srot_ srot -#define sgemv_ sgemv -#define strsv_ strsv -#define sgemm_ sgemm -#define strsm_ strsm - -#define dasum_ dasum -#define idamax_ idamax -#define dcopy_ dcopy -#define dscal_ dscal -#define dger_ dger -#define dnrm2_ dnrm2 -#define dsymv_ dsymv -#define ddot_ ddot -#define daxpy_ daxpy -#define dsyr2_ dsyr2 -#define drot_ drot -#define dgemv_ dgemv -#define dtrsv_ dtrsv -#define dgemm_ dgemm -#define dtrsm_ dtrsm - -#define scasum_ scasum -#define icamax_ icamax -#define ccopy_ ccopy -#define cscal_ cscal -#define scnrm2_ scnrm2 -#define caxpy_ caxpy -#define cgemv_ cgemv -#define ctrsv_ ctrsv -#define cgemm_ cgemm -#define ctrsm_ ctrsm -#define cgerc_ cgerc -#define chemv_ chemv -#define cher2_ cher2 - -#define dzasum_ dzasum -#define izamax_ izamax -#define zcopy_ zcopy -#define zscal_ zscal -#define dznrm2_ dznrm2 -#define zaxpy_ zaxpy -#define zgemv_ zgemv -#define ztrsv_ ztrsv -#define zgemm_ zgemm -#define ztrsm_ ztrsm -#define zgerc_ zgerc -#define zhemv_ zhemv -#define zher2_ zher2 - -#define c_bridge_dgssv_ c_bridge_dgssv -#endif - -#endif /* __SUPERLU_CNAMES */