diff --git a/thirdparty/superlu/SuperLU_4.1/FORTRAN/c_fortran_dgssv.c.bak b/thirdparty/superlu/SuperLU_4.1/FORTRAN/c_fortran_dgssv.c.bak
deleted file mode 100644
index 284abef..0000000
--- a/thirdparty/superlu/SuperLU_4.1/FORTRAN/c_fortran_dgssv.c.bak
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-
-#include "dsp_defs.h"
-
-#define HANDLE_SIZE  8
-/* kind of integer to hold a pointer.  Use int.
-   This might need to be changed on 64-bit systems. */
-typedef int fptr;  /* 32-bit by default */
-
-typedef struct {
-    SuperMatrix *L;
-    SuperMatrix *U;
-    int *perm_c;
-    int *perm_r;
-} factors_t;
-
-void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
-		 int *rowind, int *colptr, double *b, int *ldb,
-		 fptr *f_factors, /* a handle containing the address
-				     pointing to the factored matrices */
-		 int *info)
-
-{
-/* 
- * This routine can be called from Fortran.
- *
- * iopt (input) int
- *      Specifies the operation:
- *      = 1, performs LU decomposition for the first time
- *      = 2, performs triangular solve
- *      = 3, free all the storage in the end
- *
- * f_factors (input/output) fptr* 
- *      If iopt == 1, it is an output and contains the pointer pointing to
- *                    the structure of the factored matrices.
- *      Otherwise, it it an input.
- *
- */
- 
-    SuperMatrix A, AC, B;
-    SuperMatrix *L, *U;
-    int *perm_r; /* row permutations from partial pivoting */
-    int *perm_c; /* column permutation vector */
-    int *etree;  /* column elimination tree */
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      i, panel_size, permc_spec, relax;
-    trans_t  trans;
-    double   drop_tol = 0.0;
-    mem_usage_t   mem_usage;
-    superlu_options_t options;
-    SuperLUStat_t stat;
-    factors_t *LUfactors;
-
-    trans = NOTRANS;
-
-    if ( *iopt == 1 ) { /* LU decomposition */
-
-        /* Set the default input options. */
-        set_default_options(&options);
-
-	/* Initialize the statistics variables. */
-	StatInit(&stat);
-
-	/* Adjust to 0-based indexing */
-	for (i = 0; i < *nnz; ++i) --rowind[i];
-	for (i = 0; i <= *n; ++i) --colptr[i];
-
-	dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
-			       SLU_NC, SLU_D, SLU_GE);
-	L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-	U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-	if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[].");
-	if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[].");
-	if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[].");
-
-	/*
-	 * Get column permutation vector perm_c[], according to permc_spec:
-	 *   permc_spec = 0: natural ordering 
-	 *   permc_spec = 1: minimum degree on structure of A'*A
-	 *   permc_spec = 2: minimum degree on structure of A'+A
-	 *   permc_spec = 3: approximate minimum degree for unsymmetric matrices
-	 */    	
-	permc_spec = options.ColPerm;
-	get_perm_c(permc_spec, &A, perm_c);
-	
-	sp_preorder(&options, &A, perm_c, etree, &AC);
-
-	panel_size = sp_ienv(1);
-	relax = sp_ienv(2);
-
-	dgstrf(&options, &AC, drop_tol, relax, panel_size, 
-	       etree, NULL, 0, perm_c, perm_r, L, U, &stat, info);
-
-	if ( *info == 0 ) {
-	    Lstore = (SCformat *) L->Store;
-	    Ustore = (NCformat *) U->Store;
-	    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
-	    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
-	    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-	    dQuerySpace(L, U, &mem_usage);
-	    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-		   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
-		   mem_usage.expansions);
-	} else {
-	    printf("dgstrf() error returns INFO= %d\n", *info);
-	    if ( *info <= *n ) { /* factorization completes */
-		dQuerySpace(L, U, &mem_usage);
-		printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-		       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
-		       mem_usage.expansions);
-	    }
-	}
-	
-	/* Restore to 1-based indexing */
-	for (i = 0; i < *nnz; ++i) ++rowind[i];
-	for (i = 0; i <= *n; ++i) ++colptr[i];
-
-	/* Save the LU factors in the factors handle */
-	LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t));
-	LUfactors->L = L;
-	LUfactors->U = U;
-	LUfactors->perm_c = perm_c;
-	LUfactors->perm_r = perm_r;
-	*f_factors = (fptr) LUfactors;
-
-	/* Free un-wanted storage */
-	SUPERLU_FREE(etree);
-	Destroy_SuperMatrix_Store(&A);
-	Destroy_CompCol_Permuted(&AC);
-	StatFree(&stat);
-
-    } else if ( *iopt == 2 ) { /* Triangular solve */
-	/* Initialize the statistics variables. */
-	StatInit(&stat);
-
-	/* Extract the LU factors in the factors handle */
-	LUfactors = (factors_t*) *f_factors;
-	L = LUfactors->L;
-	U = LUfactors->U;
-	perm_c = LUfactors->perm_c;
-	perm_r = LUfactors->perm_r;
-
-	dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE);
-
-        /* Solve the system A*X=B, overwriting B with X. */
-        dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
-
-	Destroy_SuperMatrix_Store(&B);
-	StatFree(&stat);
-
-    } else if ( *iopt == 3 ) { /* Free storage */
-	/* Free the LU factors in the factors handle */
-	LUfactors = (factors_t*) *f_factors;
-	SUPERLU_FREE (LUfactors->perm_r);
-	SUPERLU_FREE (LUfactors->perm_c);
-	Destroy_SuperNode_Matrix(LUfactors->L);
-	Destroy_CompCol_Matrix(LUfactors->U);
-        SUPERLU_FREE (LUfactors->L);
-        SUPERLU_FREE (LUfactors->U);
-	SUPERLU_FREE (LUfactors);
-    } else {
-	fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt);
-	exit(-1);
-    }
-}
-
-
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/cgsitrf.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/cgsitrf.c.bak
deleted file mode 100644
index 03474b1..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/cgsitrf.c.bak
+++ /dev/null
@@ -1,629 +0,0 @@
-
-/*! @file cgsitf.c
- * \brief Computes an ILU factorization of a general sparse matrix
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory.
- * June 30, 2009
- * </pre>
- */
-
-#include "slu_cdefs.h"
-
-#ifdef DEBUG
-int num_drop_L;
-#endif
-
-/*! \brief
- *
- * <pre>
- * Purpose
- * =======
- *
- * CGSITRF computes an ILU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *	   The structure defines the input parameters to control
- *	   how the ILU decomposition will be performed.
- *
- * A	    (input) SuperMatrix*
- *	    Original matrix A, permuted by columns, of dimension
- *	    (A->nrow, A->ncol). The type of A can be:
- *	    Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
- *
- * relax    (input) int
- *	    To control degree of relaxing supernodes. If the number
- *	    of nodes (columns) in a subtree of the elimination tree is less
- *	    than relax, this subtree is considered as one supernode,
- *	    regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *	    A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *	    Elimination tree of A'*A.
- *	    Note: etree is a vector of parent pointers for a forest whose
- *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *	    On input, the columns of A should be permuted so that the
- *	    etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *	    User-supplied work space and space for the output data structures.
- *	    Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *	   Specifies the size of work array in bytes.
- *	   = 0:  allocate space internally by system malloc;
- *	   > 0:  use user-supplied work array of length lwork in bytes,
- *		 returns error if space runs out.
- *	   = -1: the routine guesses the amount of space needed without
- *		 performing the factorization, and returns it in
- *		 *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *	    Column permutation vector, which defines the
- *	    permutation matrix Pc; perm_c[i] = j means column i of A is
- *	    in position j in A*Pc.
- *	    When searching for diagonal, perm_c[*] is applied to the
- *	    row subscripts of A, so that diagonal threshold pivoting
- *	    can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *	    Row permutation vector which defines the permutation matrix Pr,
- *	    perm_r[i] = j means row i of A is in position j in Pr*A.
- *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *	       will try to use the input perm_r, unless a certain threshold
- *	       criterion is violated. In that case, perm_r is overwritten by
- *	       a new permutation determined by partial pivoting or diagonal
- *	       threshold pivoting.
- *	    Otherwise, perm_r is output argument;
- *
- * L	    (output) SuperMatrix*
- *	    The factor L from the factorization Pr*A=L*U; use compressed row
- *	    subscripts storage for supernodes, i.e., L has type:
- *	    Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U	    (output) SuperMatrix*
- *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *	    storage scheme, i.e., U has types: Stype = SLU_NC,
- *	    Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *	    Record the statistics on runtime and floating-point operation count.
- *	    See slu_util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *	    = 0: successful exit
- *	    < 0: if info = -i, the i-th argument had an illegal value
- *	    > 0: if info = i, and i is
- *	       <= A->ncol: number of zero pivots. They are replaced by small
- *		  entries according to options->ILU_FillTol.
- *	       > A->ncol: number of bytes allocated when memory allocation
- *		  failure occurred, plus A->ncol. If lwork = -1, it is
- *		  the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays:
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been
- *	reached when working on column j.
- *	Storage: relative to original row subscripts
- *	NOTE: There are 4 of them:
- *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
- *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
- *	      marker_relax(has its own space) is used for relaxed supernodes.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *	Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
- *	unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *	in topological order of the dfs. A supernode representative is the
- *	last column of a supernode.
- *	The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
- *	supernodal representative r, repfnz[r] is the location of the first
- *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *	indicates the supernode r has been explored.
- *	NOTE: There are W of them, each used for one column of a panel.
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
- *	the panel diagonal. These are filled in during dpanel_dfs(), and are
- *	used later in the inner LU factorization within the panel.
- *	panel_lsub[]/dense[] pair forms the SPA data structure.
- *	NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *		   NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
- *	It is also used by the dropping routine ilu_ddrop_row().
- * </pre>
- */
-
-void
-cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
-	int *etree, void *work, int lwork, int *perm_c, int *perm_r,
-	SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info)
-{
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when
-				  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *swap, *iswap; /* swap is used to store the row permutation
-				during the factorization. Initially, it is set
-				to iperm_c (row indeces of Pc*A*Pc').
-				iswap is the inverse of swap. After the
-				factorization, it is equal to perm_r. */
-    int       *iwork;
-    complex   *cwork;
-    int       *segrep, *repfnz, *parent, *xplore;
-    int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
-    int       *marker, *marker_relax;
-    complex    *dense, *tempv;
-    float *stempv;
-    int       *relax_end, *relax_fsupc;
-    complex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    float    *amax; 
-    complex    drop_sum;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-    int       *iwork2;	   /* used by the second dropping rule */
-
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    double    drop_tol = options->ILU_DropTol; /* tau */
-    double    fill_ini = options->ILU_FillTol; /* tau^hat */
-    double    gamma = options->ILU_FillFactor;
-    int       drop_rule = options->ILU_DropRule;
-    milu_t    milu = options->ILU_MILU;
-    double    fill_tol;
-    int       pivrow;	/* pivotal row number in the original matrix A */
-    int       nseg1;	/* no of segments in U-column above panel row jcol */
-    int       nseg;	/* no of segments in each U-column */
-    register int jcol;
-    register int kcol;	/* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;	/* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-
-    int       last_drop;/* the last column which the dropping rules applied */
-    int       quota;
-    int       nnzAj;	/* number of nonzeros in A(:,1:j) */
-    int       nnzLj, nnzUj;
-    double    tol_L = drop_tol, tol_U = drop_tol;
-    complex zero = {0.0, 0.0};
-
-    /* Executable */	   
-    iinfo    = 0;
-    m	     = A->nrow;
-    n	     = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a	     = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-
-    /* Allocate storage common to the factor routines */
-    *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
-		       gamma, L, U, &Glu, &iwork, &cwork);
-    if ( *info ) return;
-
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-	     &repfnz, &panel_lsub, &marker_relax, &marker);
-    cSetRWork(m, panel_size, cwork, &dense, &tempv);
-
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-	/* Compute the inverse of perm_r */
-	iperm_r = (int *) intMalloc(m);
-	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-	iperm_r_allocated = 1;
-    }
-
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-    swap = (int *)intMalloc(n);
-    for (k = 0; k < n; k++) swap[k] = iperm_c[k];
-    iswap = (int *)intMalloc(n);
-    for (k = 0; k < n; k++) iswap[k] = perm_c[k];
-    amax = (float *) floatMalloc(panel_size);
-    if (drop_rule & DROP_SECONDARY)
-	iwork2 = (int *)intMalloc(n);
-    else
-	iwork2 = NULL;
-
-    nnzAj = 0;
-    nnzLj = 0;
-    nnzUj = 0;
-    last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
-
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    relax_fsupc = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES )
-	ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
-    else
-	ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
-
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-
-    /* Mark the rows used by relaxed supernodes */
-    ifill (marker_relax, m, EMPTY);
-    i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
-	         asub, marker_relax);
-#if ( PRNTlevel >= 1)
-    printf("%d relaxed supernodes.\n", i);
-#endif
-
-    /*
-     * Work on one "panel" at a time. A panel is one of the following:
-     *	   (a) a relaxed supernode at the bottom of the etree, or
-     *	   (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-
-	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
-	    panel_histo[kcol-jcol+1]++;
-
-	    /* Drop small rows in the previous supernode. */
-	    if (jcol > 0 && jcol < last_drop) {
-		int first = xsup[supno[jcol - 1]];
-		int last = jcol - 1;
-		int quota;
-
-		/* Compute the quota */
-		if (drop_rule & DROP_PROWS)
-		    quota = gamma * Astore->nnz / m * (m - first) / m
-			    * (last - first + 1);
-		else if (drop_rule & DROP_COLUMN) {
-		    int i;
-		    quota = 0;
-		    for (i = first; i <= last; i++)
-			quota += xa_end[i] - xa_begin[i];
-		    quota = gamma * quota * (m - first) / m;
-		} else if (drop_rule & DROP_AREA)
-		    quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-			    - nnzLj;
-		else
-		    quota = m * n;
-		fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
-
-		/* Drop small rows */
-                stempv = (float *) tempv;
-		i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj,
-				  &fill_tol, &Glu, stempv, iwork2, 0);
-		/* Reset the parameters */
-		if (drop_rule & DROP_DYNAMIC) {
-		    if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-			     < nnzLj)
-			tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
-		    else
-			tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
-		}
-		if (fill_tol < 0) iinfo -= (int)fill_tol;
-#ifdef DEBUG
-		num_drop_L += i * (last - first + 1);
-#endif
-	    }
-
-	    /* --------------------------------------
-	     * Factorize the relaxed supernode(jcol:kcol)
-	     * -------------------------------------- */
-	    /* Determine the union of the row structure of the snode */
-	    if ( (*info = ilu_csnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-					 marker, &Glu)) != 0 )
-		return;
-
-	    nextu    = xusub[jcol];
-	    nextlu   = xlusup[jcol];
-	    jsupno   = supno[jcol];
-	    fsupc    = xsup[jsupno];
-	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-	    nzlumax = Glu.nzlumax;
-	    while ( new_next > nzlumax ) {
-		if ((*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)))
-		    return;
-	    }
-
-	    for (icol = jcol; icol <= kcol; icol++) {
-		xusub[icol+1] = nextu;
-
-		amax[0] = 0.0;
-		/* Scatter into SPA dense[*] */
-		for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
-                    register float tmp = c_abs1 (&a[k]);
-		    if (tmp > amax[0]) amax[0] = tmp;
-		    dense[asub[k]] = a[k];
-		}
-		nnzAj += xa_end[icol] - xa_begin[icol];
-		if (amax[0] == 0.0) {
-		    amax[0] = fill_ini;
-#if ( PRNTlevel >= 1)
-		    printf("Column %d is entirely zero!\n", icol);
-		    fflush(stdout);
-#endif
-		}
-
-		/* Numeric update within the snode */
-		csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-
-		if (usepr) pivrow = iperm_r[icol];
-		fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
-		if ( (*info = ilu_cpivotL(icol, diag_pivot_thresh, &usepr,
-					  perm_r, iperm_c[icol], swap, iswap,
-					  marker_relax, &pivrow,
-                                          amax[0] * fill_tol, milu, zero,
-                                          &Glu, stat)) ) {
-		    iinfo++;
-		    marker[pivrow] = kcol;
-		}
-
-	    }
-
-	    jcol = kcol + 1;
-
-	} else { /* Work on one panel of panel_size columns */
-
-	    /* Adjust panel_size so that a panel won't overlap with the next
-	     * relaxed snode.
-	     */
-	    panel_size = w_def;
-	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
-		if ( relax_end[k] != EMPTY ) {
-		    panel_size = k - jcol;
-		    break;
-		}
-	    if ( k == min_mn ) panel_size = min_mn - jcol;
-	    panel_histo[panel_size]++;
-
-	    /* symbolic factor on a panel of columns */
-	    ilu_cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                          dense, amax, panel_lsub, segrep, repfnz,
-                          marker, parent, xplore, &Glu);
-
-	    /* numeric sup-panel updates in topological order */
-	    cpanel_bmod(m, panel_size, jcol, nseg1, dense,
-			tempv, segrep, repfnz, &Glu, stat);
-
-	    /* Sparse LU within the panel, and below panel diagonal */
-	    for (jj = jcol; jj < jcol + panel_size; jj++) {
-
-		k = (jj - jcol) * m; /* column index for w-wide arrays */
-
-		nseg = nseg1;	/* Begin after all the panel segments */
-
-		nnzAj += xa_end[jj] - xa_begin[jj];
-
-		if ((*info = ilu_ccolumn_dfs(m, jj, perm_r, &nseg,
-					     &panel_lsub[k], segrep, &repfnz[k],
-					     marker, parent, xplore, &Glu)))
-		    return;
-
-		/* Numeric updates */
-		if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k],
-					  tempv, &segrep[nseg1], &repfnz[k],
-					  jcol, &Glu, stat)) != 0) return;
-
-		/* Make a fill-in position if the column is entirely zero */
-		if (xlsub[jj + 1] == xlsub[jj]) {
-		    register int i, row;
-		    int nextl;
-		    int nzlmax = Glu.nzlmax;
-		    int *lsub = Glu.lsub;
-		    int *marker2 = marker + 2 * m;
-
-		    /* Allocate memory */
-		    nextl = xlsub[jj] + 1;
-		    if (nextl >= nzlmax) {
-			int error = cLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu);
-			if (error) { *info = error; return; }
-			lsub = Glu.lsub;
-		    }
-		    xlsub[jj + 1]++;
-		    assert(xlusup[jj]==xlusup[jj+1]);
-		    xlusup[jj + 1]++;
-		    Glu.lusup[xlusup[jj]] = zero;
-
-		    /* Choose a row index (pivrow) for fill-in */
-		    for (i = jj; i < n; i++)
-			if (marker_relax[swap[i]] <= jj) break;
-		    row = swap[i];
-		    marker2[row] = jj;
-		    lsub[xlsub[jj]] = row;
-#ifdef DEBUG
-		    printf("Fill col %d.\n", jj);
-		    fflush(stdout);
-#endif
-		}
-
-		/* Computer the quota */
-		if (drop_rule & DROP_PROWS)
-		    quota = gamma * Astore->nnz / m * jj / m;
-		else if (drop_rule & DROP_COLUMN)
-		    quota = gamma * (xa_end[jj] - xa_begin[jj]) *
-			    (jj + 1) / m;
-		else if (drop_rule & DROP_AREA)
-		    quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
-		else
-		    quota = m;
-
-		/* Copy the U-segments to ucol[*] and drop small entries */
-		if ((*info = ilu_ccopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-					       perm_r, &dense[k], drop_rule,
-					       milu, amax[jj - jcol] * tol_U,
-					       quota, &drop_sum, &nnzUj, &Glu,
-					       iwork2)) != 0)
-		    return;
-
-		/* Reset the dropping threshold if required */
-		if (drop_rule & DROP_DYNAMIC) {
-		    if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
-			tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
-		    else
-			tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
-		}
-
-                cs_mult(&drop_sum, &drop_sum, MILU_ALPHA);
-		if (usepr) pivrow = iperm_r[jj];
-		fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
-		if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-					  iperm_c[jj], swap, iswap,
-					  marker_relax, &pivrow,
-					  amax[jj - jcol] * fill_tol, milu,
-					  drop_sum, &Glu, stat)) ) {
-		    iinfo++;
-		    marker[m + pivrow] = jj;
-		    marker[2 * m + pivrow] = jj;
-		}
-
-		/* Reset repfnz[] for this column */
-		resetrep_col (nseg, segrep, &repfnz[k]);
-
-		/* Start a new supernode, drop the previous one */
-		if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
-		    int first = xsup[supno[jj - 1]];
-		    int last = jj - 1;
-		    int quota;
-
-		    /* Compute the quota */
-		    if (drop_rule & DROP_PROWS)
-			quota = gamma * Astore->nnz / m * (m - first) / m
-				* (last - first + 1);
-		    else if (drop_rule & DROP_COLUMN) {
-			int i;
-			quota = 0;
-			for (i = first; i <= last; i++)
-			    quota += xa_end[i] - xa_begin[i];
-			quota = gamma * quota * (m - first) / m;
-		    } else if (drop_rule & DROP_AREA)
-			quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
-				/ m) - nnzLj;
-		    else
-			quota = m * n;
-		    fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
-			    (double)min_mn);
-
-		    /* Drop small rows */
-                    stempv = (float *) tempv;
-		    i = ilu_cdrop_row(options, first, last, tol_L, quota,
-				      &nnzLj, &fill_tol, &Glu, stempv, iwork2,
-				      1);
-
-		    /* Reset the parameters */
-		    if (drop_rule & DROP_DYNAMIC) {
-			if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-				< nnzLj)
-			    tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
-			else
-			    tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
-		    }
-		    if (fill_tol < 0) iinfo -= (int)fill_tol;
-#ifdef DEBUG
-		    num_drop_L += i * (last - first + 1);
-#endif
-		} /* if start a new supernode */
-
-	    } /* for */
-
-	    jcol += panel_size; /* Move to the next panel */
-
-	} /* else */
-
-    } /* for */
-
-    *info = iinfo;
-
-    if ( m > n ) {
-	k = 0;
-	for (i = 0; i < m; ++i)
-	    if ( perm_r[i] == EMPTY ) {
-		perm_r[i] = n + k;
-		++k;
-	    }
-    }
-
-    ilu_countnz(min_mn, &nnzL, &nnzU, &Glu);
-    fixupL(min_mn, perm_r, &Glu);
-
-    cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */
-
-    if ( fact == SamePattern_SameRowPerm ) {
-	/* L and U structures may have changed due to possibly different
-	   pivoting, even though the storage is available.
-	   There could also be memory expansions, so the array locations
-	   may have changed, */
-	((SCformat *)L->Store)->nnz = nnzL;
-	((SCformat *)L->Store)->nsuper = Glu.supno[n];
-	((SCformat *)L->Store)->nzval = Glu.lusup;
-	((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-	((SCformat *)L->Store)->rowind = Glu.lsub;
-	((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-	((NCformat *)U->Store)->nnz = nnzU;
-	((NCformat *)U->Store)->nzval = Glu.ucol;
-	((NCformat *)U->Store)->rowind = Glu.usub;
-	((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-	cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
-				 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-				 Glu.xsup, SLU_SC, SLU_C, SLU_TRLU);
-	cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
-			       Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU);
-    }
-
-    ops[FACT] += ops[TRSV] + ops[GEMV];
-    stat->expansions = --(Glu.num_expansions);
-
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
-    SUPERLU_FREE (swap);
-    SUPERLU_FREE (iswap);
-    SUPERLU_FREE (relax_fsupc);
-    SUPERLU_FREE (amax);
-    if ( iwork2 ) SUPERLU_FREE (iwork2);
-
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/cgssvx.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/cgssvx.c.bak
deleted file mode 100644
index 428ecb0..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/cgssvx.c.bak
+++ /dev/null
@@ -1,619 +0,0 @@
-
-/*! @file cgssvx.c
- * \brief Solves the system of linear equations A*X=B or A'*X=B
- *
- * <pre>
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- * </pre>
- */
-#include "slu_cdefs.h"
-
-/*! \brief
- *
- * <pre>
- * Purpose
- * =======
- *
- * CGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from cgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *	   If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) float*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) float*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *	   The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *	   The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) float*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) float*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) float*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) float*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See slu_util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- * </pre>
- */
-
-void
-cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, float *R, float *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, 
-       float *rcond, float *ferr, float *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
-{
-
-
-    DNformat  *Bstore, *Xstore;
-    complex    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    float    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    float    diag_pivot_thresh;
-    double    t0;      /* temporary time */
-    double    *utime;
-
-    /* External functions */
-    extern float clangs(char *, SuperMatrix *);
-
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-	*(unsigned char *)equed = 'N';
-	rowequ = FALSE;
-	colequ = FALSE;
-    } else {
-	rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-	colequ = lsame_(equed, "C") || lsame_(equed, "B");
-	smlnum = slamch_("Safe minimum");
-	bignum = 1. / smlnum;
-    }
-
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-#endif
-
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-	options->Fact != SamePattern_SameRowPerm &&
-	!notran && options->Trans != TRANS && options->Trans != CONJ &&
-	!equil && options->Equil != NO)
-	*info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-	      (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-	      A->Dtype != SLU_C || A->Mtype != SLU_GE )
-	*info = -2;
-    else if (options->Fact == FACTORED &&
-	     !(rowequ || colequ || lsame_(equed, "N")))
-	*info = -6;
-    else {
-	if (rowequ) {
-	    rcmin = bignum;
-	    rcmax = 0.;
-	    for (j = 0; j < A->nrow; ++j) {
-		rcmin = SUPERLU_MIN(rcmin, R[j]);
-		rcmax = SUPERLU_MAX(rcmax, R[j]);
-	    }
-	    if (rcmin <= 0.) *info = -7;
-	    else if ( A->nrow > 0)
-		rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-	    else rowcnd = 1.;
-	}
-	if (colequ && *info == 0) {
-	    rcmin = bignum;
-	    rcmax = 0.;
-	    for (j = 0; j < A->nrow; ++j) {
-		rcmin = SUPERLU_MIN(rcmin, C[j]);
-		rcmax = SUPERLU_MAX(rcmax, C[j]);
-	    }
-	    if (rcmin <= 0.) *info = -8;
-	    else if (A->nrow > 0)
-		colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-	    else colcnd = 1.;
-	}
-	if (*info == 0) {
-	    if ( lwork < -1 ) *info = -12;
-	    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-		      B->Stype != SLU_DN || B->Dtype != SLU_C || 
-		      B->Mtype != SLU_GE )
-		*info = -13;
-	    else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-		      (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-		      X->Dtype != SLU_C || X->Mtype != SLU_GE )
-		*info = -14;
-	}
-    }
-    if (*info != 0) {
-	i = -(*info);
-	xerbla_("cgssvx", &i);
-	return;
-    }
-    
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-
-    utime = stat->utime;
-    
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-	NRformat *Astore = A->Store;
-	AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-	cCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-			       Astore->nzval, Astore->colind, Astore->rowptr,
-			       SLU_NC, A->Dtype, A->Mtype);
-	if ( notran ) { /* Reverse the transpose argument. */
-	    trant = TRANS;
-	    notran = 0;
-	} else {
-	    trant = NOTRANS;
-	    notran = 1;
-	}
-    } else { /* A->Stype == SLU_NC */
-	trant = options->Trans;
-	AA = A;
-    }
-
-    if ( nofact && equil ) {
-	t0 = SuperLU_timer_();
-	/* Compute row and column scalings to equilibrate the matrix A. */
-	cgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-	
-	if ( info1 == 0 ) {
-	    /* Equilibrate matrix A. */
-	    claqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-	    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-	    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-	}
-	utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-	    if ( rowequ ) {
-	        for (j = 0; j < nrhs; ++j)
-		    for (i = 0; i < A->nrow; ++i) {
-                        cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
-	            }
-	    }
-        } else if ( colequ ) {
-	    for (j = 0; j < nrhs; ++j)
-	        for (i = 0; i < A->nrow; ++i) {
-                    cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
-	        }
-        }
-    }
-
-    if ( nofact ) {
-	
-        t0 = SuperLU_timer_();
-	/*
-	 * Gnet column permutation vector perm_c[], according to permc_spec:
-	 *   permc_spec = NATURAL:  natural ordering 
-	 *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-	 *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-	 *   permc_spec = COLAMD:   approximate minimum degree column ordering
-	 *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-	 */
-	permc_spec = options->ColPerm;
-	if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-	utime[COLPERM] = SuperLU_timer_() - t0;
-
-	t0 = SuperLU_timer_();
-	sp_preorder(options, AA, perm_c, etree, &AC);
-	utime[ETREE] = SuperLU_timer_() - t0;
-    
-/*	printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-	       relax, panel_size, sp_ienv(3), sp_ienv(4));
-	fflush(stdout); */
-	
-	/* Compute the LU factorization of A*Pc. */
-	t0 = SuperLU_timer_();
-	cgstrf(options, &AC, relax, panel_size, etree,
-                work, lwork, perm_c, perm_r, L, U, stat, info);
-	utime[FACT] = SuperLU_timer_() - t0;
-	
-	if ( lwork == -1 ) {
-	    mem_usage->total_needed = *info - A->ncol;
-	    return;
-	}
-    }
-
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-	    if ( *info <= A->ncol ) {
-	        /* Compute the reciprocal pivot growth factor of the leading
-	           rank-deficient *info columns of A. */
-	        *recip_pivot_growth = cPivotGrowth(*info, AA, perm_c, L, U);
-	    }
-	    return;
-        }
-
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = cPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-
-    if ( options->ConditionNumber ) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-	    *(unsigned char *)norm = '1';
-        } else {
-	    *(unsigned char *)norm = 'I';
-        }
-        anorm = clangs(norm, AA);
-        cgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-    }
-    
-    if ( nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-	        Xmat[i + j*ldx] = Bmat[i + j*ldb];
-    
-        t0 = SuperLU_timer_();
-        cgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-    
-        /* Use iterative refinement to improve the computed solution and compute
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            cgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-
-        /* Transform the solution matrix X to a solution of the original system. */
-        if ( notran ) {
-	    if ( colequ ) {
-	        for (j = 0; j < nrhs; ++j)
-		    for (i = 0; i < A->nrow; ++i) {
-                        cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]);
-	            }
-	    }
-        } else if ( rowequ ) {
-	    for (j = 0; j < nrhs; ++j)
-	        for (i = 0; i < A->nrow; ++i) {
-                    cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
-                }
-        }
-    } /* end if nrhs > 0 */
-
-    if ( options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */
-        if ( *rcond < slamch_("E") ) *info = A->ncol + 1;
-    }
-
-    if ( nofact ) {
-        cQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-	Destroy_SuperMatrix_Store(AA);
-	SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/clacon.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/clacon.c.bak
deleted file mode 100644
index 3b826f5..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/clacon.c.bak
+++ /dev/null
@@ -1,221 +0,0 @@
-
-/*! @file clacon.c
- * \brief Estimates the 1-norm
- *
- * <pre>
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- * </pre>
- */
-#include <math.h>
-#include "slu_Cnames.h"
-#include "slu_scomplex.h"
-
-/*! \brief
- *
- * <pre>
- *   Purpose   
- *   =======   
- *
- *   CLACON estimates the 1-norm of a square matrix A.   
- *   Reverse communication is used for evaluating matrix-vector products. 
- * 
- *
- *   Arguments   
- *   =========   
- *
- *   N      (input) INT
- *          The order of the matrix.  N >= 1.   
- *
- *   V      (workspace) COMPLEX PRECISION array, dimension (N)   
- *          On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
- *          (W is not returned).   
- *
- *   X      (input/output) COMPLEX PRECISION array, dimension (N)   
- *          On an intermediate return, X should be overwritten by   
- *                A * X,   if KASE=1,   
- *                A' * X,  if KASE=2,
- *          where A' is the conjugate transpose of A,
- *         and CLACON must be re-called with all the other parameters   
- *          unchanged.   
- *
- *
- *   EST    (output) FLOAT PRECISION   
- *          An estimate (a lower bound) for norm(A).   
- *
- *   KASE   (input/output) INT
- *          On the initial call to CLACON, KASE should be 0.   
- *          On an intermediate return, KASE will be 1 or 2, indicating   
- *          whether X should be overwritten by A * X  or A' * X.   
- *          On the final return from CLACON, KASE will again be 0.   
- *
- *   Further Details   
- *   ======= =======   
- *
- *   Contributed by Nick Higham, University of Manchester.   
- *   Originally named CONEST, dated March 16, 1988.   
- *
- *   Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
- *   a real or complex matrix, with applications to condition estimation", 
- *   ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
- *   ===================================================================== 
- * </pre>
- */
-
-int
-clacon_(int *n, complex *v, complex *x, float *est, int *kase)
-
-{
-
-
-    /* Table of constant values */
-    int c__1 = 1;
-    complex      zero = {0.0, 0.0};
-    complex      one = {1.0, 0.0};
-
-    /* System generated locals */
-    float d__1;
-    
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static float altsgn, estold;
-    static int i, j;
-    float temp;
-    float safmin;
-    extern double slamch_(char *);
-    extern int icmax1_(int *, complex *, int *);
-    extern double scsum1_(int *, complex *, int *);
-
-    safmin = slamch_("Safe minimum");
-    if ( *kase == 0 ) {
-	for (i = 0; i < *n; ++i) {
-	    x[i].r = 1. / (float) (*n);
-	    x[i].i = 0.;
-	}
-	*kase = 1;
-	jump = 1;
-	return 0;
-    }
-
-    switch (jump) {
-	case 1:  goto L20;
-	case 2:  goto L40;
-	case 3:  goto L70;
-	case 4:  goto L110;
-	case 5:  goto L140;
-    }
-
-    /*     ................ ENTRY   (JUMP = 1)   
-	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
-  L20:
-    if (*n == 1) {
-	v[0] = x[0];
-	*est = c_abs(&v[0]);
-	/*        ... QUIT */
-	goto L150;
-    }
-    *est = scsum1_(n, x, &c__1);
-
-    for (i = 0; i < *n; ++i) {
-	d__1 = c_abs(&x[i]);
-	if (d__1 > safmin) {
-	    d__1 = 1 / d__1;
-	    x[i].r *= d__1;
-	    x[i].i *= d__1;
-	} else {
-	    x[i] = one;
-	}
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 2)   
-	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
-L40:
-    j = icmax1_(n, &x[0], &c__1);
-    --j;
-    iter = 2;
-
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-L50:
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 3)   
-	   X HAS BEEN OVERWRITTEN BY A*X. */
-L70:
-#ifdef _CRAY
-    CCOPY(n, x, &c__1, v, &c__1);
-#else
-    ccopy_(n, x, &c__1, v, &c__1);
-#endif
-    estold = *est;
-    *est = scsum1_(n, v, &c__1);
-
-
-L90:
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-
-    for (i = 0; i < *n; ++i) {
-	d__1 = c_abs(&x[i]);
-	if (d__1 > safmin) {
-	    d__1 = 1 / d__1;
-	    x[i].r *= d__1;
-	    x[i].i *= d__1;
-	} else {
-	    x[i] = one;
-	}
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 4)   
-	   X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
-L110:
-    jlast = j;
-    j = icmax1_(n, &x[0], &c__1);
-    --j;
-    if (x[jlast].r != (d__1 = x[j].r, fabs(d__1)) && iter < 5) {
-	++iter;
-	goto L50;
-    }
-
-    /*     ITERATION COMPLETE.  FINAL STAGE. */
-L120:
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-	x[i-1].r = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.);
-	x[i-1].i = 0.;
-	altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    
-    /*     ................ ENTRY   (JUMP = 5)   
-	   X HAS BEEN OVERWRITTEN BY A*X. */
-L140:
-    temp = scsum1_(n, x, &c__1) / (float)(*n * 3) * 2.;
-    if (temp > *est) {
-#ifdef _CRAY
-	CCOPY(n, &x[0], &c__1, &v[0], &c__1);
-#else
-	ccopy_(n, &x[0], &c__1, &v[0], &c__1);
-#endif
-	*est = temp;
-    }
-
-L150:
-    *kase = 0;
-    return 0;
-
-} /* clacon_ */
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/dgsitrf.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/dgsitrf.c.bak
deleted file mode 100644
index 969b5c1..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/dgsitrf.c.bak
+++ /dev/null
@@ -1,626 +0,0 @@
-
-/*! @file dgsitf.c
- * \brief Computes an ILU factorization of a general sparse matrix
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory.
- * June 30, 2009
- * </pre>
- */
-
-#include "slu_ddefs.h"
-
-#ifdef DEBUG
-int num_drop_L;
-#endif
-
-/*! \brief
- *
- * <pre>
- * Purpose
- * =======
- *
- * DGSITRF computes an ILU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *	   The structure defines the input parameters to control
- *	   how the ILU decomposition will be performed.
- *
- * A	    (input) SuperMatrix*
- *	    Original matrix A, permuted by columns, of dimension
- *	    (A->nrow, A->ncol). The type of A can be:
- *	    Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
- *
- * relax    (input) int
- *	    To control degree of relaxing supernodes. If the number
- *	    of nodes (columns) in a subtree of the elimination tree is less
- *	    than relax, this subtree is considered as one supernode,
- *	    regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *	    A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *	    Elimination tree of A'*A.
- *	    Note: etree is a vector of parent pointers for a forest whose
- *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *	    On input, the columns of A should be permuted so that the
- *	    etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *	    User-supplied work space and space for the output data structures.
- *	    Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *	   Specifies the size of work array in bytes.
- *	   = 0:  allocate space internally by system malloc;
- *	   > 0:  use user-supplied work array of length lwork in bytes,
- *		 returns error if space runs out.
- *	   = -1: the routine guesses the amount of space needed without
- *		 performing the factorization, and returns it in
- *		 *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *	    Column permutation vector, which defines the
- *	    permutation matrix Pc; perm_c[i] = j means column i of A is
- *	    in position j in A*Pc.
- *	    When searching for diagonal, perm_c[*] is applied to the
- *	    row subscripts of A, so that diagonal threshold pivoting
- *	    can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *	    Row permutation vector which defines the permutation matrix Pr,
- *	    perm_r[i] = j means row i of A is in position j in Pr*A.
- *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *	       will try to use the input perm_r, unless a certain threshold
- *	       criterion is violated. In that case, perm_r is overwritten by
- *	       a new permutation determined by partial pivoting or diagonal
- *	       threshold pivoting.
- *	    Otherwise, perm_r is output argument;
- *
- * L	    (output) SuperMatrix*
- *	    The factor L from the factorization Pr*A=L*U; use compressed row
- *	    subscripts storage for supernodes, i.e., L has type:
- *	    Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U	    (output) SuperMatrix*
- *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *	    storage scheme, i.e., U has types: Stype = SLU_NC,
- *	    Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *	    Record the statistics on runtime and floating-point operation count.
- *	    See slu_util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *	    = 0: successful exit
- *	    < 0: if info = -i, the i-th argument had an illegal value
- *	    > 0: if info = i, and i is
- *	       <= A->ncol: number of zero pivots. They are replaced by small
- *		  entries according to options->ILU_FillTol.
- *	       > A->ncol: number of bytes allocated when memory allocation
- *		  failure occurred, plus A->ncol. If lwork = -1, it is
- *		  the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays:
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been
- *	reached when working on column j.
- *	Storage: relative to original row subscripts
- *	NOTE: There are 4 of them:
- *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
- *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
- *	      marker_relax(has its own space) is used for relaxed supernodes.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *	Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
- *	unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *	in topological order of the dfs. A supernode representative is the
- *	last column of a supernode.
- *	The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
- *	supernodal representative r, repfnz[r] is the location of the first
- *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *	indicates the supernode r has been explored.
- *	NOTE: There are W of them, each used for one column of a panel.
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
- *	the panel diagonal. These are filled in during dpanel_dfs(), and are
- *	used later in the inner LU factorization within the panel.
- *	panel_lsub[]/dense[] pair forms the SPA data structure.
- *	NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *		   NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
- *	It is also used by the dropping routine ilu_ddrop_row().
- * </pre>
- */
-
-void
-dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
-	int *etree, void *work, int lwork, int *perm_c, int *perm_r,
-	SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info)
-{
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when
-				  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *swap, *iswap; /* swap is used to store the row permutation
-				during the factorization. Initially, it is set
-				to iperm_c (row indeces of Pc*A*Pc').
-				iswap is the inverse of swap. After the
-				factorization, it is equal to perm_r. */
-    int       *iwork;
-    double   *dwork;
-    int       *segrep, *repfnz, *parent, *xplore;
-    int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
-    int       *marker, *marker_relax;
-    double    *dense, *tempv;
-    int       *relax_end, *relax_fsupc;
-    double    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    double    *amax; 
-    double    drop_sum;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-    int       *iwork2;	   /* used by the second dropping rule */
-
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    double    drop_tol = options->ILU_DropTol; /* tau */
-    double    fill_ini = options->ILU_FillTol; /* tau^hat */
-    double    gamma = options->ILU_FillFactor;
-    int       drop_rule = options->ILU_DropRule;
-    milu_t    milu = options->ILU_MILU;
-    double    fill_tol;
-    int       pivrow;	/* pivotal row number in the original matrix A */
-    int       nseg1;	/* no of segments in U-column above panel row jcol */
-    int       nseg;	/* no of segments in each U-column */
-    register int jcol;
-    register int kcol;	/* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;	/* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-
-    int       last_drop;/* the last column which the dropping rules applied */
-    int       quota;
-    int       nnzAj;	/* number of nonzeros in A(:,1:j) */
-    int       nnzLj, nnzUj;
-    double    tol_L = drop_tol, tol_U = drop_tol;
-    double zero = 0.0;
-
-    /* Executable */	   
-    iinfo    = 0;
-    m	     = A->nrow;
-    n	     = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a	     = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-
-    /* Allocate storage common to the factor routines */
-    *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
-		       gamma, L, U, &Glu, &iwork, &dwork);
-    if ( *info ) return;
-
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-	     &repfnz, &panel_lsub, &marker_relax, &marker);
-    dSetRWork(m, panel_size, dwork, &dense, &tempv);
-
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-	/* Compute the inverse of perm_r */
-	iperm_r = (int *) intMalloc(m);
-	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-	iperm_r_allocated = 1;
-    }
-
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-    swap = (int *)intMalloc(n);
-    for (k = 0; k < n; k++) swap[k] = iperm_c[k];
-    iswap = (int *)intMalloc(n);
-    for (k = 0; k < n; k++) iswap[k] = perm_c[k];
-    amax = (double *) doubleMalloc(panel_size);
-    if (drop_rule & DROP_SECONDARY)
-	iwork2 = (int *)intMalloc(n);
-    else
-	iwork2 = NULL;
-
-    nnzAj = 0;
-    nnzLj = 0;
-    nnzUj = 0;
-    last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
-
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    relax_fsupc = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES )
-	ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
-    else
-	ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
-
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-
-    /* Mark the rows used by relaxed supernodes */
-    ifill (marker_relax, m, EMPTY);
-    i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
-	         asub, marker_relax);
-#if ( PRNTlevel >= 1)
-    printf("%d relaxed supernodes.\n", i);
-#endif
-
-    /*
-     * Work on one "panel" at a time. A panel is one of the following:
-     *	   (a) a relaxed supernode at the bottom of the etree, or
-     *	   (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-
-	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
-	    panel_histo[kcol-jcol+1]++;
-
-	    /* Drop small rows in the previous supernode. */
-	    if (jcol > 0 && jcol < last_drop) {
-		int first = xsup[supno[jcol - 1]];
-		int last = jcol - 1;
-		int quota;
-
-		/* Compute the quota */
-		if (drop_rule & DROP_PROWS)
-		    quota = gamma * Astore->nnz / m * (m - first) / m
-			    * (last - first + 1);
-		else if (drop_rule & DROP_COLUMN) {
-		    int i;
-		    quota = 0;
-		    for (i = first; i <= last; i++)
-			quota += xa_end[i] - xa_begin[i];
-		    quota = gamma * quota * (m - first) / m;
-		} else if (drop_rule & DROP_AREA)
-		    quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-			    - nnzLj;
-		else
-		    quota = m * n;
-		fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
-
-		/* Drop small rows */
-		i = ilu_ddrop_row(options, first, last, tol_L, quota, &nnzLj,
-				  &fill_tol, &Glu, tempv, iwork2, 0);
-		/* Reset the parameters */
-		if (drop_rule & DROP_DYNAMIC) {
-		    if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-			     < nnzLj)
-			tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
-		    else
-			tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
-		}
-		if (fill_tol < 0) iinfo -= (int)fill_tol;
-#ifdef DEBUG
-		num_drop_L += i * (last - first + 1);
-#endif
-	    }
-
-	    /* --------------------------------------
-	     * Factorize the relaxed supernode(jcol:kcol)
-	     * -------------------------------------- */
-	    /* Determine the union of the row structure of the snode */
-	    if ( (*info = ilu_dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-					 marker, &Glu)) != 0 )
-		return;
-
-	    nextu    = xusub[jcol];
-	    nextlu   = xlusup[jcol];
-	    jsupno   = supno[jcol];
-	    fsupc    = xsup[jsupno];
-	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-	    nzlumax = Glu.nzlumax;
-	    while ( new_next > nzlumax ) {
-		if ((*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)))
-		    return;
-	    }
-
-	    for (icol = jcol; icol <= kcol; icol++) {
-		xusub[icol+1] = nextu;
-
-		amax[0] = 0.0;
-		/* Scatter into SPA dense[*] */
-		for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
-		    register double tmp = fabs(a[k]);
-		    if (tmp > amax[0]) amax[0] = tmp;
-		    dense[asub[k]] = a[k];
-		}
-		nnzAj += xa_end[icol] - xa_begin[icol];
-		if (amax[0] == 0.0) {
-		    amax[0] = fill_ini;
-#if ( PRNTlevel >= 1)
-		    printf("Column %d is entirely zero!\n", icol);
-		    fflush(stdout);
-#endif
-		}
-
-		/* Numeric update within the snode */
-		dsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-
-		if (usepr) pivrow = iperm_r[icol];
-		fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
-		if ( (*info = ilu_dpivotL(icol, diag_pivot_thresh, &usepr,
-					  perm_r, iperm_c[icol], swap, iswap,
-					  marker_relax, &pivrow,
-                                          amax[0] * fill_tol, milu, zero,
-                                          &Glu, stat)) ) {
-		    iinfo++;
-		    marker[pivrow] = kcol;
-		}
-
-	    }
-
-	    jcol = kcol + 1;
-
-	} else { /* Work on one panel of panel_size columns */
-
-	    /* Adjust panel_size so that a panel won't overlap with the next
-	     * relaxed snode.
-	     */
-	    panel_size = w_def;
-	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
-		if ( relax_end[k] != EMPTY ) {
-		    panel_size = k - jcol;
-		    break;
-		}
-	    if ( k == min_mn ) panel_size = min_mn - jcol;
-	    panel_histo[panel_size]++;
-
-	    /* symbolic factor on a panel of columns */
-	    ilu_dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                          dense, amax, panel_lsub, segrep, repfnz,
-                          marker, parent, xplore, &Glu);
-
-	    /* numeric sup-panel updates in topological order */
-	    dpanel_bmod(m, panel_size, jcol, nseg1, dense,
-			tempv, segrep, repfnz, &Glu, stat);
-
-	    /* Sparse LU within the panel, and below panel diagonal */
-	    for (jj = jcol; jj < jcol + panel_size; jj++) {
-
-		k = (jj - jcol) * m; /* column index for w-wide arrays */
-
-		nseg = nseg1;	/* Begin after all the panel segments */
-
-		nnzAj += xa_end[jj] - xa_begin[jj];
-
-		if ((*info = ilu_dcolumn_dfs(m, jj, perm_r, &nseg,
-					     &panel_lsub[k], segrep, &repfnz[k],
-					     marker, parent, xplore, &Glu)))
-		    return;
-
-		/* Numeric updates */
-		if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k],
-					  tempv, &segrep[nseg1], &repfnz[k],
-					  jcol, &Glu, stat)) != 0) return;
-
-		/* Make a fill-in position if the column is entirely zero */
-		if (xlsub[jj + 1] == xlsub[jj]) {
-		    register int i, row;
-		    int nextl;
-		    int nzlmax = Glu.nzlmax;
-		    int *lsub = Glu.lsub;
-		    int *marker2 = marker + 2 * m;
-
-		    /* Allocate memory */
-		    nextl = xlsub[jj] + 1;
-		    if (nextl >= nzlmax) {
-			int error = dLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu);
-			if (error) { *info = error; return; }
-			lsub = Glu.lsub;
-		    }
-		    xlsub[jj + 1]++;
-		    assert(xlusup[jj]==xlusup[jj+1]);
-		    xlusup[jj + 1]++;
-		    Glu.lusup[xlusup[jj]] = zero;
-
-		    /* Choose a row index (pivrow) for fill-in */
-		    for (i = jj; i < n; i++)
-			if (marker_relax[swap[i]] <= jj) break;
-		    row = swap[i];
-		    marker2[row] = jj;
-		    lsub[xlsub[jj]] = row;
-#ifdef DEBUG
-		    printf("Fill col %d.\n", jj);
-		    fflush(stdout);
-#endif
-		}
-
-		/* Computer the quota */
-		if (drop_rule & DROP_PROWS)
-		    quota = gamma * Astore->nnz / m * jj / m;
-		else if (drop_rule & DROP_COLUMN)
-		    quota = gamma * (xa_end[jj] - xa_begin[jj]) *
-			    (jj + 1) / m;
-		else if (drop_rule & DROP_AREA)
-		    quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
-		else
-		    quota = m;
-
-		/* Copy the U-segments to ucol[*] and drop small entries */
-		if ((*info = ilu_dcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-					       perm_r, &dense[k], drop_rule,
-					       milu, amax[jj - jcol] * tol_U,
-					       quota, &drop_sum, &nnzUj, &Glu,
-					       iwork2)) != 0)
-		    return;
-
-		/* Reset the dropping threshold if required */
-		if (drop_rule & DROP_DYNAMIC) {
-		    if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
-			tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
-		    else
-			tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
-		}
-
-                drop_sum *= MILU_ALPHA;
-		if (usepr) pivrow = iperm_r[jj];
-		fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
-		if ( (*info = ilu_dpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-					  iperm_c[jj], swap, iswap,
-					  marker_relax, &pivrow,
-					  amax[jj - jcol] * fill_tol, milu,
-					  drop_sum, &Glu, stat)) ) {
-		    iinfo++;
-		    marker[m + pivrow] = jj;
-		    marker[2 * m + pivrow] = jj;
-		}
-
-		/* Reset repfnz[] for this column */
-		resetrep_col (nseg, segrep, &repfnz[k]);
-
-		/* Start a new supernode, drop the previous one */
-		if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
-		    int first = xsup[supno[jj - 1]];
-		    int last = jj - 1;
-		    int quota;
-
-		    /* Compute the quota */
-		    if (drop_rule & DROP_PROWS)
-			quota = gamma * Astore->nnz / m * (m - first) / m
-				* (last - first + 1);
-		    else if (drop_rule & DROP_COLUMN) {
-			int i;
-			quota = 0;
-			for (i = first; i <= last; i++)
-			    quota += xa_end[i] - xa_begin[i];
-			quota = gamma * quota * (m - first) / m;
-		    } else if (drop_rule & DROP_AREA)
-			quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
-				/ m) - nnzLj;
-		    else
-			quota = m * n;
-		    fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
-			    (double)min_mn);
-
-		    /* Drop small rows */
-		    i = ilu_ddrop_row(options, first, last, tol_L, quota,
-				      &nnzLj, &fill_tol, &Glu, tempv, iwork2,
-				      1);
-
-		    /* Reset the parameters */
-		    if (drop_rule & DROP_DYNAMIC) {
-			if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-				< nnzLj)
-			    tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
-			else
-			    tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
-		    }
-		    if (fill_tol < 0) iinfo -= (int)fill_tol;
-#ifdef DEBUG
-		    num_drop_L += i * (last - first + 1);
-#endif
-		} /* if start a new supernode */
-
-	    } /* for */
-
-	    jcol += panel_size; /* Move to the next panel */
-
-	} /* else */
-
-    } /* for */
-
-    *info = iinfo;
-
-    if ( m > n ) {
-	k = 0;
-	for (i = 0; i < m; ++i)
-	    if ( perm_r[i] == EMPTY ) {
-		perm_r[i] = n + k;
-		++k;
-	    }
-    }
-
-    ilu_countnz(min_mn, &nnzL, &nnzU, &Glu);
-    fixupL(min_mn, perm_r, &Glu);
-
-    dLUWorkFree(iwork, dwork, &Glu); /* Free work space and compress storage */
-
-    if ( fact == SamePattern_SameRowPerm ) {
-	/* L and U structures may have changed due to possibly different
-	   pivoting, even though the storage is available.
-	   There could also be memory expansions, so the array locations
-	   may have changed, */
-	((SCformat *)L->Store)->nnz = nnzL;
-	((SCformat *)L->Store)->nsuper = Glu.supno[n];
-	((SCformat *)L->Store)->nzval = Glu.lusup;
-	((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-	((SCformat *)L->Store)->rowind = Glu.lsub;
-	((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-	((NCformat *)U->Store)->nnz = nnzU;
-	((NCformat *)U->Store)->nzval = Glu.ucol;
-	((NCformat *)U->Store)->rowind = Glu.usub;
-	((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-	dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
-				 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-				 Glu.xsup, SLU_SC, SLU_D, SLU_TRLU);
-	dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
-			       Glu.usub, Glu.xusub, SLU_NC, SLU_D, SLU_TRU);
-    }
-
-    ops[FACT] += ops[TRSV] + ops[GEMV];
-    stat->expansions = --(Glu.num_expansions);
-
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
-    SUPERLU_FREE (swap);
-    SUPERLU_FREE (iswap);
-    SUPERLU_FREE (relax_fsupc);
-    SUPERLU_FREE (amax);
-    if ( iwork2 ) SUPERLU_FREE (iwork2);
-
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/dlacon.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/dlacon.c.bak
deleted file mode 100644
index 951fe7a..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/dlacon.c.bak
+++ /dev/null
@@ -1,236 +0,0 @@
-
-/*! @file dlacon.c
- * \brief Estimates the 1-norm
- *
- * <pre>
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- * </pre>
- */
-#include <math.h>
-#include "slu_Cnames.h"
-
-/*! \brief
- *
- * <pre>
- *   Purpose   
- *   =======   
- *
- *   DLACON estimates the 1-norm of a square matrix A.   
- *   Reverse communication is used for evaluating matrix-vector products. 
- * 
- *
- *   Arguments   
- *   =========   
- *
- *   N      (input) INT
- *          The order of the matrix.  N >= 1.   
- *
- *   V      (workspace) DOUBLE PRECISION array, dimension (N)   
- *          On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
- *          (W is not returned).   
- *
- *   X      (input/output) DOUBLE PRECISION array, dimension (N)   
- *          On an intermediate return, X should be overwritten by   
- *                A * X,   if KASE=1,   
- *                A' * X,  if KASE=2,
- *         and DLACON must be re-called with all the other parameters   
- *          unchanged.   
- *
- *   ISGN   (workspace) INT array, dimension (N)
- *
- *   EST    (output) DOUBLE PRECISION   
- *          An estimate (a lower bound) for norm(A).   
- *
- *   KASE   (input/output) INT
- *          On the initial call to DLACON, KASE should be 0.   
- *          On an intermediate return, KASE will be 1 or 2, indicating   
- *          whether X should be overwritten by A * X  or A' * X.   
- *          On the final return from DLACON, KASE will again be 0.   
- *
- *   Further Details   
- *   ======= =======   
- *
- *   Contributed by Nick Higham, University of Manchester.   
- *   Originally named CONEST, dated March 16, 1988.   
- *
- *   Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
- *   a real or complex matrix, with applications to condition estimation", 
- *   ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
- *   ===================================================================== 
- * </pre>
- */
-
-int
-dlacon_(int *n, double *v, double *x, int *isgn, double *est, int *kase)
-
-{
-
-
-    /* Table of constant values */
-    int c__1 = 1;
-    double      zero = 0.0;
-    double      one = 1.0;
-    
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static double altsgn, estold;
-    static int i, j;
-    double temp;
-#ifdef _CRAY
-    extern int ISAMAX(int *, double *, int *);
-    extern double SASUM(int *, double *, int *);
-    extern int SCOPY(int *, double *, int *, double *, int *);
-#else
-    extern int idamax_(int *, double *, int *);
-    extern double dasum_(int *, double *, int *);
-    extern int dcopy_(int *, double *, int *, double *, int *);
-#endif
-#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a))    /* Copy sign */
-#define i_dnnt(a) \
-	( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */
-
-    if ( *kase == 0 ) {
-	for (i = 0; i < *n; ++i) {
-	    x[i] = 1. / (double) (*n);
-	}
-	*kase = 1;
-	jump = 1;
-	return 0;
-    }
-
-    switch (jump) {
-	case 1:  goto L20;
-	case 2:  goto L40;
-	case 3:  goto L70;
-	case 4:  goto L110;
-	case 5:  goto L140;
-    }
-
-    /*     ................ ENTRY   (JUMP = 1)   
-	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
-  L20:
-    if (*n == 1) {
-	v[0] = x[0];
-	*est = fabs(v[0]);
-	/*        ... QUIT */
-	goto L150;
-    }
-#ifdef _CRAY
-    *est = SASUM(n, x, &c__1);
-#else
-    *est = dasum_(n, x, &c__1);
-#endif
-
-    for (i = 0; i < *n; ++i) {
-	x[i] = d_sign(one, x[i]);
-	isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 2)   
-	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
-L40:
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-#else
-    j = idamax_(n, &x[0], &c__1);
-#endif
-    --j;
-    iter = 2;
-
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-L50:
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 3)   
-	   X HAS BEEN OVERWRITTEN BY A*X. */
-L70:
-#ifdef _CRAY
-    SCOPY(n, x, &c__1, v, &c__1);
-#else
-    dcopy_(n, x, &c__1, v, &c__1);
-#endif
-    estold = *est;
-#ifdef _CRAY
-    *est = SASUM(n, v, &c__1);
-#else
-    *est = dasum_(n, v, &c__1);
-#endif
-
-    for (i = 0; i < *n; ++i)
-	if (i_dnnt(d_sign(one, x[i])) != isgn[i])
-	    goto L90;
-
-    /*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */
-    goto L120;
-
-L90:
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-
-    for (i = 0; i < *n; ++i) {
-	x[i] = d_sign(one, x[i]);
-	isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 4)   
-	   X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
-L110:
-    jlast = j;
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-#else
-    j = idamax_(n, &x[0], &c__1);
-#endif
-    --j;
-    if (x[jlast] != fabs(x[j]) && iter < 5) {
-	++iter;
-	goto L50;
-    }
-
-    /*     ITERATION COMPLETE.  FINAL STAGE. */
-L120:
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-	x[i-1] = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.);
-	altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    
-    /*     ................ ENTRY   (JUMP = 5)   
-	   X HAS BEEN OVERWRITTEN BY A*X. */
-L140:
-#ifdef _CRAY
-    temp = SASUM(n, x, &c__1) / (double)(*n * 3) * 2.;
-#else
-    temp = dasum_(n, x, &c__1) / (double)(*n * 3) * 2.;
-#endif
-    if (temp > *est) {
-#ifdef _CRAY
-	SCOPY(n, &x[0], &c__1, &v[0], &c__1);
-#else
-	dcopy_(n, &x[0], &c__1, &v[0], &c__1);
-#endif
-	*est = temp;
-    }
-
-L150:
-    *kase = 0;
-    return 0;
-
-} /* dlacon_ */
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ccopy_to_ucol.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ccopy_to_ucol.c.bak
deleted file mode 100644
index ba32096..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ccopy_to_ucol.c.bak
+++ /dev/null
@@ -1,202 +0,0 @@
-
-/*! @file ilu_ccopy_to_ucol.c
- * \brief Copy a computed column of U to the compressed data structure
- * and drop some small entries
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory
- * June 30, 2009
- * </pre>
- */
-
-#include "slu_cdefs.h"
-
-#ifdef DEBUG
-int num_drop_U;
-#endif
-
-static complex *A;  /* used in _compare_ only */
-static int _compare_(const void *a, const void *b)
-{
-    register int *x = (int *)a, *y = (int *)b;
-    register float xx = c_abs1(&A[*x]), yy = c_abs1(&A[*y]);
-    if (xx > yy) return -1;
-    else if (xx < yy) return 1;
-    else return 0;
-}
-
-
-int
-ilu_ccopy_to_ucol(
-	      int	 jcol,	   /* in */
-	      int	 nseg,	   /* in */
-	      int	 *segrep,  /* in */
-	      int	 *repfnz,  /* in */
-	      int	 *perm_r,  /* in */
-	      complex	 *dense,   /* modified - reset to zero on return */
-	      int  	 drop_rule,/* in */
-	      milu_t	 milu,	   /* in */
-	      double	 drop_tol, /* in */
-	      int	 quota,    /* maximum nonzero entries allowed */
-	      complex	 *sum,	   /* out - the sum of dropped entries */
-	      int	 *nnzUj,   /* in - out */
-	      GlobalLU_t *Glu,	   /* modified */
-	      int	 *work	   /* working space with minimum size n,
-				    * used by the second dropping rule */
-	      )
-{
-/*
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int       ksub, krep, ksupno;
-    int       i, k, kfnz, segsze;
-    int       fsupc, isub, irow;
-    int       jsupno, nextu;
-    int       new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    complex    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-    int       m; /* number of entries in the nonzero U-segments */
-    register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum");
-    register double tmp;
-    complex zero = {0.0, 0.0};
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-
-    *sum = zero;
-    if (drop_rule == NODROP) {
-	drop_tol = -1.0, quota = Glu->n;
-    }
-
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-	krep = segrep[k--];
-	ksupno = supno[krep];
-
-	if ( ksupno != jsupno ) { /* Should go into ucol[] */
-	    kfnz = repfnz[krep];
-	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */
-
-		fsupc = xsup[ksupno];
-		isub = xlsub[fsupc] + kfnz - fsupc;
-		segsze = krep - kfnz + 1;
-
-		new_next = nextu + segsze;
-		while ( new_next > nzumax ) {
-		    if ((mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax,
-			    Glu)) != 0)
-			return (mem_error);
-		    ucol = Glu->ucol;
-		    if ((mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax,
-			    Glu)) != 0)
-			return (mem_error);
-		    usub = Glu->usub;
-		    lsub = Glu->lsub;
-		}
-
-		for (i = 0; i < segsze; i++) {
-		    irow = lsub[isub++];
-         	    tmp = c_abs1(&dense[irow]);
-
-		    /* first dropping rule */
-		    if (quota > 0 && tmp >= drop_tol) {
-			if (tmp > d_max) d_max = tmp;
-			if (tmp < d_min) d_min = tmp;
-			usub[nextu] = perm_r[irow];
-			ucol[nextu] = dense[irow];
-			nextu++;
-		    } else {
-			switch (milu) {
-			    case SMILU_1:
-			    case SMILU_2:
-                                c_add(sum, sum, &dense[irow]);
-				break;
-			    case SMILU_3:
-				/* *sum += fabs(dense[irow]);*/
-				sum->r += tmp;
-				break;
-			    case SILU:
-			    default:
-				break;
-			}
-#ifdef DEBUG
-			num_drop_U++;
-#endif
-		    }
-		    dense[irow] = zero;
-		}
-
-	    }
-
-	}
-
-    } /* for each segment... */
-
-    xusub[jcol + 1] = nextu;	  /* Close U[*,jcol] */
-    m = xusub[jcol + 1] - xusub[jcol];
-
-    /* second dropping rule */
-    if (drop_rule & DROP_SECONDARY && m > quota) {
-	register double tol = d_max;
-	register int m0 = xusub[jcol] + m - 1;
-
-	if (quota > 0) {
-	    if (drop_rule & DROP_INTERP) {
-		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
-		tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
-	    } else {
-		A = &ucol[xusub[jcol]];
-		for (i = 0; i < m; i++) work[i] = i;
-		qsort(work, m, sizeof(int), _compare_);
-		tol = fabs(usub[xusub[jcol] + work[quota]]);
-	    }
-	}
-	for (i = xusub[jcol]; i <= m0; ) {
-	    if (c_abs1(&ucol[i]) <= tol) {
-		switch (milu) {
-		    case SMILU_1:
-		    case SMILU_2:
-			c_add(sum, sum, &ucol[i]);
-			break;
-		    case SMILU_3:
-			sum->r += tmp;
-			break;
-		    case SILU:
-		    default:
-			break;
-		}
-		ucol[i] = ucol[m0];
-		usub[i] = usub[m0];
-		m0--;
-		m--;
-#ifdef DEBUG
-		num_drop_U++;
-#endif
-		xusub[jcol + 1]--;
-		continue;
-	    }
-	    i++;
-	}
-    }
-
-    if (milu == SMILU_2) {
-        sum->r = c_abs1(sum); sum->i = 0.0;
-    }
-    if (milu == SMILU_3) sum->i = 0.0;
-
-    *nnzUj += m;
-
-    return 0;
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_dcopy_to_ucol.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_dcopy_to_ucol.c.bak
deleted file mode 100644
index a27a148..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_dcopy_to_ucol.c.bak
+++ /dev/null
@@ -1,199 +0,0 @@
-
-/*! @file ilu_dcopy_to_ucol.c
- * \brief Copy a computed column of U to the compressed data structure
- * and drop some small entries
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory
- * June 30, 2009
- * </pre>
- */
-
-#include "slu_ddefs.h"
-
-#ifdef DEBUG
-int num_drop_U;
-#endif
-
-static double *A;  /* used in _compare_ only */
-static int _compare_(const void *a, const void *b)
-{
-    register int *x = (int *)a, *y = (int *)b;
-    register double xx = fabs(A[*x]), yy = fabs(A[*y]);
-    if (xx > yy) return -1;
-    else if (xx < yy) return 1;
-    else return 0;
-}
-
-
-int
-ilu_dcopy_to_ucol(
-	      int	 jcol,	   /* in */
-	      int	 nseg,	   /* in */
-	      int	 *segrep,  /* in */
-	      int	 *repfnz,  /* in */
-	      int	 *perm_r,  /* in */
-	      double	 *dense,   /* modified - reset to zero on return */
-	      int  	 drop_rule,/* in */
-	      milu_t	 milu,	   /* in */
-	      double	 drop_tol, /* in */
-	      int	 quota,    /* maximum nonzero entries allowed */
-	      double	 *sum,	   /* out - the sum of dropped entries */
-	      int	 *nnzUj,   /* in - out */
-	      GlobalLU_t *Glu,	   /* modified */
-	      int	 *work	   /* working space with minimum size n,
-				    * used by the second dropping rule */
-	      )
-{
-/*
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int       ksub, krep, ksupno;
-    int       i, k, kfnz, segsze;
-    int       fsupc, isub, irow;
-    int       jsupno, nextu;
-    int       new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    double    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-    int       m; /* number of entries in the nonzero U-segments */
-    register double d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum");
-    register double tmp;
-    double zero = 0.0;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-
-    *sum = zero;
-    if (drop_rule == NODROP) {
-	drop_tol = -1.0, quota = Glu->n;
-    }
-
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-	krep = segrep[k--];
-	ksupno = supno[krep];
-
-	if ( ksupno != jsupno ) { /* Should go into ucol[] */
-	    kfnz = repfnz[krep];
-	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */
-
-		fsupc = xsup[ksupno];
-		isub = xlsub[fsupc] + kfnz - fsupc;
-		segsze = krep - kfnz + 1;
-
-		new_next = nextu + segsze;
-		while ( new_next > nzumax ) {
-		    if ((mem_error = dLUMemXpand(jcol, nextu, UCOL, &nzumax,
-			    Glu)) != 0)
-			return (mem_error);
-		    ucol = Glu->ucol;
-		    if ((mem_error = dLUMemXpand(jcol, nextu, USUB, &nzumax,
-			    Glu)) != 0)
-			return (mem_error);
-		    usub = Glu->usub;
-		    lsub = Glu->lsub;
-		}
-
-		for (i = 0; i < segsze; i++) {
-		    irow = lsub[isub++];
-		    tmp = fabs(dense[irow]);
-
-		    /* first dropping rule */
-		    if (quota > 0 && tmp >= drop_tol) {
-			if (tmp > d_max) d_max = tmp;
-			if (tmp < d_min) d_min = tmp;
-			usub[nextu] = perm_r[irow];
-			ucol[nextu] = dense[irow];
-			nextu++;
-		    } else {
-			switch (milu) {
-			    case SMILU_1:
-			    case SMILU_2:
-				*sum += dense[irow];
-				break;
-			    case SMILU_3:
-				/* *sum += fabs(dense[irow]);*/
-				*sum += tmp;
-				break;
-			    case SILU:
-			    default:
-				break;
-			}
-#ifdef DEBUG
-			num_drop_U++;
-#endif
-		    }
-		    dense[irow] = zero;
-		}
-
-	    }
-
-	}
-
-    } /* for each segment... */
-
-    xusub[jcol + 1] = nextu;	  /* Close U[*,jcol] */
-    m = xusub[jcol + 1] - xusub[jcol];
-
-    /* second dropping rule */
-    if (drop_rule & DROP_SECONDARY && m > quota) {
-	register double tol = d_max;
-	register int m0 = xusub[jcol] + m - 1;
-
-	if (quota > 0) {
-	    if (drop_rule & DROP_INTERP) {
-		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
-		tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
-	    } else {
-		A = &ucol[xusub[jcol]];
-		for (i = 0; i < m; i++) work[i] = i;
-		qsort(work, m, sizeof(int), _compare_);
-		tol = fabs(usub[xusub[jcol] + work[quota]]);
-	    }
-	}
-	for (i = xusub[jcol]; i <= m0; ) {
-	    if (fabs(ucol[i]) <= tol) {
-		switch (milu) {
-		    case SMILU_1:
-		    case SMILU_2:
-			*sum += ucol[i];
-			break;
-		    case SMILU_3:
-			*sum += fabs(ucol[i]);
-			break;
-		    case SILU:
-		    default:
-			break;
-		}
-		ucol[i] = ucol[m0];
-		usub[i] = usub[m0];
-		m0--;
-		m--;
-#ifdef DEBUG
-		num_drop_U++;
-#endif
-		xusub[jcol + 1]--;
-		continue;
-	    }
-	    i++;
-	}
-    }
-
-    if (milu == SMILU_2) *sum = fabs(*sum);
-
-    *nnzUj += m;
-
-    return 0;
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ddrop_row.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ddrop_row.c.bak
deleted file mode 100644
index caffa13..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_ddrop_row.c.bak
+++ /dev/null
@@ -1,311 +0,0 @@
-
-/*! @file ilu_ddrop_row.c
- * \brief Drop small rows from L
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory.
- * June 30, 2009
- * <\pre>
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-
-extern void dswap_(int *, double [], int *, double [], int *);
-extern void daxpy_(int *, double *, double [], int *, double [], int *);
-
-static double *A;  /* used in _compare_ only */
-static int _compare_(const void *a, const void *b)
-{
-    register int *x = (int *)a, *y = (int *)b;
-    if (A[*x] - A[*y] > 0.0) return -1;
-    else if (A[*x] - A[*y] < 0.0) return 1;
-    else return 0;
-}
-
-/*! \brief
- * <pre>
- * Purpose
- * =======
- *    ilu_ddrop_row() - Drop some small rows from the previous 
- *    supernode (L-part only).
- * </pre>
- */
-int ilu_ddrop_row(
-	superlu_options_t *options, /* options */
-	int    first,	    /* index of the first column in the supernode */
-	int    last,	    /* index of the last column in the supernode */
-	double drop_tol,    /* dropping parameter */
-	int    quota,	    /* maximum nonzero entries allowed */
-	int    *nnzLj,	    /* in/out number of nonzeros in L(:, 1:last) */
-	double *fill_tol,   /* in/out - on exit, fill_tol=-num_zero_pivots,
-			     * does not change if options->ILU_MILU != SMILU1 */
-	GlobalLU_t *Glu,    /* modified */
-	double dwork[],     /* working space with minimum size last-first+1 */
-	int    iwork[],     /* working space with minimum size m - n,
-			     * used by the second dropping rule */
-	int    lastc	    /* if lastc == 0, there is nothing after the
-			     * working supernode [first:last];
-			     * if lastc == 1, there is one more column after
-			     * the working supernode. */ )
-{
-    register int i, j, k, m1;
-    register int nzlc; /* number of nonzeros in column last+1 */
-    register int xlusup_first, xlsub_first;
-    int m, n; /* m x n is the size of the supernode */
-    int r = 0; /* number of dropped rows */
-    register double *temp;
-    register double *lusup = Glu->lusup;
-    register int *lsub = Glu->lsub;
-    register int *xlsub = Glu->xlsub;
-    register int *xlusup = Glu->xlusup;
-    register double d_max = 0.0, d_min = 1.0;
-    int    drop_rule = options->ILU_DropRule;
-    milu_t milu = options->ILU_MILU;
-    norm_t nrm = options->ILU_Norm;
-    double zero = 0.0;
-    double one = 1.0;
-    double none = -1.0;
-    int inc_diag; /* inc_diag = m + 1 */
-    int nzp = 0;  /* number of zero pivots */
-
-    xlusup_first = xlusup[first];
-    xlsub_first = xlsub[first];
-    m = xlusup[first + 1] - xlusup_first;
-    n = last - first + 1;
-    m1 = m - 1;
-    inc_diag = m + 1;
-    nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
-    temp = dwork - n;
-
-    /* Quick return if nothing to do. */
-    if (m == 0 || m == n || drop_rule == NODROP)
-    {
-	*nnzLj += m * n;
-	return 0;
-    }
-
-    /* basic dropping: ILU(tau) */
-    for (i = n; i <= m1; )
-    {
-	/* the average abs value of ith row */
-	switch (nrm)
-	{
-	    case ONE_NORM:
-		temp[i] = dasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
-		break;
-	    case TWO_NORM:
-		temp[i] = dnrm2_(&n, &lusup[xlusup_first + i], &m)
-		    / sqrt((double)n);
-		break;
-	    case INF_NORM:
-	    default:
-		k = idamax_(&n, &lusup[xlusup_first + i], &m) - 1;
-		temp[i] = fabs(lusup[xlusup_first + i + m * k]);
-		break;
-	}
-
-	/* drop small entries due to drop_tol */
-	if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
-	{
-	    r++;
-	    /* drop the current row and move the last undropped row here */
-	    if (r > 1) /* add to last row */
-	    {
-		/* accumulate the sum (for MILU) */
-		switch (milu)
-		{
-		    case SMILU_1:
-		    case SMILU_2:
-			daxpy_(&n, &one, &lusup[xlusup_first + i], &m,
-				&lusup[xlusup_first + m - 1], &m);
-			break;
-		    case SMILU_3:
-			for (j = 0; j < n; j++)
-			    lusup[xlusup_first + (m - 1) + j * m] +=
-				    fabs(lusup[xlusup_first + i + j * m]);
-			break;
-		    case SILU:
-		    default:
-			break;
-		}
-		dcopy_(&n, &lusup[xlusup_first + m1], &m,
-                       &lusup[xlusup_first + i], &m);
-	    } /* if (r > 1) */
-	    else /* move to last row */
-	    {
-		dswap_(&n, &lusup[xlusup_first + m1], &m,
-			&lusup[xlusup_first + i], &m);
-		if (milu == SMILU_3)
-		    for (j = 0; j < n; j++) {
-			lusup[xlusup_first + m1 + j * m] =
-				fabs(lusup[xlusup_first + m1 + j * m]);
-                    }
-	    }
-	    lsub[xlsub_first + i] = lsub[xlsub_first + m1];
-	    m1--;
-	    continue;
-	} /* if dropping */
-	else
-	{
-	    if (temp[i] > d_max) d_max = temp[i];
-	    if (temp[i] < d_min) d_min = temp[i];
-	}
-	i++;
-    } /* for */
-
-    /* Secondary dropping: drop more rows according to the quota. */
-    quota = ceil((double)quota / (double)n);
-    if (drop_rule & DROP_SECONDARY && m - r > quota)
-    {
-	register double tol = d_max;
-
-	/* Calculate the second dropping tolerance */
-	if (quota > n)
-	{
-	    if (drop_rule & DROP_INTERP) /* by interpolation */
-	    {
-		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
-		tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
-	    }
-	    else /* by quick sort */
-	    {
-		register int *itemp = iwork - n;
-		A = temp;
-		for (i = n; i <= m1; i++) itemp[i] = i;
-		qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
-		printf("new quota = %d, iwork size for initilization = %d\n",
-		       quota, m1-n+1);
-		/* tol = temp[iwork[quota]];  bug! */
-		assert( n <= quota <= m1 );
-		tol = temp[itemp[quota]];
-	    }
-	}
-
-	for (i = n; i <= m1; )
-	{
-	    if (temp[i] <= tol)
-	    {
-		register int j;
-		r++;
-		/* drop the current row and move the last undropped row here */
-		if (r > 1) /* add to last row */
-		{
-		    /* accumulate the sum (for MILU) */
-		    switch (milu)
-		    {
-			case SMILU_1:
-			case SMILU_2:
-			    daxpy_(&n, &one, &lusup[xlusup_first + i], &m,
-				    &lusup[xlusup_first + m - 1], &m);
-			    break;
-			case SMILU_3:
-			    for (j = 0; j < n; j++)
-				lusup[xlusup_first + (m - 1) + j * m] +=
-					fabs(lusup[xlusup_first + i + j * m]);
-			    break;
-			case SILU:
-			default:
-			    break;
-		    }
-		    dcopy_(&n, &lusup[xlusup_first + m1], &m,
-			    &lusup[xlusup_first + i], &m);
-		} /* if (r > 1) */
-		else /* move to last row */
-		{
-		    dswap_(&n, &lusup[xlusup_first + m1], &m,
-			    &lusup[xlusup_first + i], &m);
-		    if (milu == SMILU_3)
-			for (j = 0; j < n; j++) {
-			    lusup[xlusup_first + m1 + j * m] =
-				    fabs(lusup[xlusup_first + m1 + j * m]);
-                        }
-		}
-		lsub[xlsub_first + i] = lsub[xlsub_first + m1];
-		m1--;
-		temp[i] = temp[m1];
-
-		continue;
-	    }
-	    i++;
-
-	} /* for */
-
-    } /* if secondary dropping */
-
-    for (i = n; i < m; i++) temp[i] = 0.0;
-
-    if (r == 0)
-    {
-	*nnzLj += m * n;
-	return 0;
-    }
-
-    /* add dropped entries to the diagnal */
-    if (milu != SILU)
-    {
-	register int j;
-	double t;
-	for (j = 0; j < n; j++)
-	{
-	    t = lusup[xlusup_first + (m - 1) + j * m] * MILU_ALPHA;
- 	    switch (milu)
-	    {
-		case SMILU_1:
-		    if (t != none) {
-			lusup[xlusup_first + j * inc_diag] *= (one + t);
-                    }
-		    else
-		    {
-			lusup[xlusup_first + j * inc_diag] *= *fill_tol;
-#ifdef DEBUG
-			printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
-			fflush(stdout);
-#endif
-			nzp++;
-		    }
-		    break;
-		case SMILU_2:
-		    lusup[xlusup_first + j * inc_diag] *= (1.0 + fabs(t));
-		    break;
-		case SMILU_3:
-		    lusup[xlusup_first + j * inc_diag] *= (one + t);
-		    break;
-		case SILU:
-		default:
-		    break;
-	    }
-	}
-	if (nzp > 0) *fill_tol = -nzp;
-    }
-
-    /* Remove dropped entries from the memory and fix the pointers. */
-    m1 = m - r;
-    for (j = 1; j < n; j++)
-    {
-	register int tmp1, tmp2;
-	tmp1 = xlusup_first + j * m1;
-	tmp2 = xlusup_first + j * m;
-	for (i = 0; i < m1; i++)
-	    lusup[i + tmp1] = lusup[i + tmp2];
-    }
-    for (i = 0; i < nzlc; i++)
-	lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
-    for (i = 0; i < nzlc; i++)
-	lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
-    for (i = first + 1; i <= last + 1; i++)
-    {
-	xlusup[i] -= r * (i - first);
-	xlsub[i] -= r;
-    }
-    if (lastc)
-    {
-	xlusup[last + 2] -= r * n;
-	xlsub[last + 2] -= r;
-    }
-
-    *nnzLj += (m - r) * n;
-    return r;
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_scopy_to_ucol.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_scopy_to_ucol.c.bak
deleted file mode 100644
index 2b3bc70..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_scopy_to_ucol.c.bak
+++ /dev/null
@@ -1,199 +0,0 @@
-
-/*! @file ilu_scopy_to_ucol.c
- * \brief Copy a computed column of U to the compressed data structure
- * and drop some small entries
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory
- * June 30, 2009
- * </pre>
- */
-
-#include "slu_sdefs.h"
-
-#ifdef DEBUG
-int num_drop_U;
-#endif
-
-static float *A;  /* used in _compare_ only */
-static int _compare_(const void *a, const void *b)
-{
-    register int *x = (int *)a, *y = (int *)b;
-    register double xx = fabs(A[*x]), yy = fabs(A[*y]);
-    if (xx > yy) return -1;
-    else if (xx < yy) return 1;
-    else return 0;
-}
-
-
-int
-ilu_scopy_to_ucol(
-	      int	 jcol,	   /* in */
-	      int	 nseg,	   /* in */
-	      int	 *segrep,  /* in */
-	      int	 *repfnz,  /* in */
-	      int	 *perm_r,  /* in */
-	      float	 *dense,   /* modified - reset to zero on return */
-	      int  	 drop_rule,/* in */
-	      milu_t	 milu,	   /* in */
-	      double	 drop_tol, /* in */
-	      int	 quota,    /* maximum nonzero entries allowed */
-	      float	 *sum,	   /* out - the sum of dropped entries */
-	      int	 *nnzUj,   /* in - out */
-	      GlobalLU_t *Glu,	   /* modified */
-	      int	 *work	   /* working space with minimum size n,
-				    * used by the second dropping rule */
-	      )
-{
-/*
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int       ksub, krep, ksupno;
-    int       i, k, kfnz, segsze;
-    int       fsupc, isub, irow;
-    int       jsupno, nextu;
-    int       new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    float    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-    int       m; /* number of entries in the nonzero U-segments */
-    register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum");
-    register double tmp;
-    float zero = 0.0;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-
-    *sum = zero;
-    if (drop_rule == NODROP) {
-	drop_tol = -1.0, quota = Glu->n;
-    }
-
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-	krep = segrep[k--];
-	ksupno = supno[krep];
-
-	if ( ksupno != jsupno ) { /* Should go into ucol[] */
-	    kfnz = repfnz[krep];
-	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */
-
-		fsupc = xsup[ksupno];
-		isub = xlsub[fsupc] + kfnz - fsupc;
-		segsze = krep - kfnz + 1;
-
-		new_next = nextu + segsze;
-		while ( new_next > nzumax ) {
-		    if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax,
-			    Glu)) != 0)
-			return (mem_error);
-		    ucol = Glu->ucol;
-		    if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax,
-			    Glu)) != 0)
-			return (mem_error);
-		    usub = Glu->usub;
-		    lsub = Glu->lsub;
-		}
-
-		for (i = 0; i < segsze; i++) {
-		    irow = lsub[isub++];
-		    tmp = fabs(dense[irow]);
-
-		    /* first dropping rule */
-		    if (quota > 0 && tmp >= drop_tol) {
-			if (tmp > d_max) d_max = tmp;
-			if (tmp < d_min) d_min = tmp;
-			usub[nextu] = perm_r[irow];
-			ucol[nextu] = dense[irow];
-			nextu++;
-		    } else {
-			switch (milu) {
-			    case SMILU_1:
-			    case SMILU_2:
-				*sum += dense[irow];
-				break;
-			    case SMILU_3:
-				/* *sum += fabs(dense[irow]);*/
-				*sum += tmp;
-				break;
-			    case SILU:
-			    default:
-				break;
-			}
-#ifdef DEBUG
-			num_drop_U++;
-#endif
-		    }
-		    dense[irow] = zero;
-		}
-
-	    }
-
-	}
-
-    } /* for each segment... */
-
-    xusub[jcol + 1] = nextu;	  /* Close U[*,jcol] */
-    m = xusub[jcol + 1] - xusub[jcol];
-
-    /* second dropping rule */
-    if (drop_rule & DROP_SECONDARY && m > quota) {
-	register double tol = d_max;
-	register int m0 = xusub[jcol] + m - 1;
-
-	if (quota > 0) {
-	    if (drop_rule & DROP_INTERP) {
-		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
-		tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
-	    } else {
-		A = &ucol[xusub[jcol]];
-		for (i = 0; i < m; i++) work[i] = i;
-		qsort(work, m, sizeof(int), _compare_);
-		tol = fabs(usub[xusub[jcol] + work[quota]]);
-	    }
-	}
-	for (i = xusub[jcol]; i <= m0; ) {
-	    if (fabs(ucol[i]) <= tol) {
-		switch (milu) {
-		    case SMILU_1:
-		    case SMILU_2:
-			*sum += ucol[i];
-			break;
-		    case SMILU_3:
-			*sum += fabs(ucol[i]);
-			break;
-		    case SILU:
-		    default:
-			break;
-		}
-		ucol[i] = ucol[m0];
-		usub[i] = usub[m0];
-		m0--;
-		m--;
-#ifdef DEBUG
-		num_drop_U++;
-#endif
-		xusub[jcol + 1]--;
-		continue;
-	    }
-	    i++;
-	}
-    }
-
-    if (milu == SMILU_2) *sum = fabs(*sum);
-
-    *nnzUj += m;
-
-    return 0;
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_zcopy_to_ucol.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/ilu_zcopy_to_ucol.c.bak
deleted file mode 100644
index 9859c2f..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/ilu_zcopy_to_ucol.c.bak
+++ /dev/null
@@ -1,202 +0,0 @@
-
-/*! @file ilu_zcopy_to_ucol.c
- * \brief Copy a computed column of U to the compressed data structure
- * and drop some small entries
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory
- * June 30, 2009
- * </pre>
- */
-
-#include "slu_zdefs.h"
-
-#ifdef DEBUG
-int num_drop_U;
-#endif
-
-static doublecomplex *A;  /* used in _compare_ only */
-static int _compare_(const void *a, const void *b)
-{
-    register int *x = (int *)a, *y = (int *)b;
-    register double xx = z_abs1(&A[*x]), yy = z_abs1(&A[*y]);
-    if (xx > yy) return -1;
-    else if (xx < yy) return 1;
-    else return 0;
-}
-
-
-int
-ilu_zcopy_to_ucol(
-	      int	 jcol,	   /* in */
-	      int	 nseg,	   /* in */
-	      int	 *segrep,  /* in */
-	      int	 *repfnz,  /* in */
-	      int	 *perm_r,  /* in */
-	      doublecomplex	 *dense,   /* modified - reset to zero on return */
-	      int  	 drop_rule,/* in */
-	      milu_t	 milu,	   /* in */
-	      double	 drop_tol, /* in */
-	      int	 quota,    /* maximum nonzero entries allowed */
-	      doublecomplex	 *sum,	   /* out - the sum of dropped entries */
-	      int	 *nnzUj,   /* in - out */
-	      GlobalLU_t *Glu,	   /* modified */
-	      int	 *work	   /* working space with minimum size n,
-				    * used by the second dropping rule */
-	      )
-{
-/*
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int       ksub, krep, ksupno;
-    int       i, k, kfnz, segsze;
-    int       fsupc, isub, irow;
-    int       jsupno, nextu;
-    int       new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    doublecomplex    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-    int       m; /* number of entries in the nonzero U-segments */
-    register double d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum");
-    register double tmp;
-    doublecomplex zero = {0.0, 0.0};
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-
-    *sum = zero;
-    if (drop_rule == NODROP) {
-	drop_tol = -1.0, quota = Glu->n;
-    }
-
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-	krep = segrep[k--];
-	ksupno = supno[krep];
-
-	if ( ksupno != jsupno ) { /* Should go into ucol[] */
-	    kfnz = repfnz[krep];
-	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */
-
-		fsupc = xsup[ksupno];
-		isub = xlsub[fsupc] + kfnz - fsupc;
-		segsze = krep - kfnz + 1;
-
-		new_next = nextu + segsze;
-		while ( new_next > nzumax ) {
-		    if ((mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax,
-			    Glu)) != 0)
-			return (mem_error);
-		    ucol = Glu->ucol;
-		    if ((mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax,
-			    Glu)) != 0)
-			return (mem_error);
-		    usub = Glu->usub;
-		    lsub = Glu->lsub;
-		}
-
-		for (i = 0; i < segsze; i++) {
-		    irow = lsub[isub++];
-         	    tmp = z_abs1(&dense[irow]);
-
-		    /* first dropping rule */
-		    if (quota > 0 && tmp >= drop_tol) {
-			if (tmp > d_max) d_max = tmp;
-			if (tmp < d_min) d_min = tmp;
-			usub[nextu] = perm_r[irow];
-			ucol[nextu] = dense[irow];
-			nextu++;
-		    } else {
-			switch (milu) {
-			    case SMILU_1:
-			    case SMILU_2:
-                                z_add(sum, sum, &dense[irow]);
-				break;
-			    case SMILU_3:
-				/* *sum += fabs(dense[irow]);*/
-				sum->r += tmp;
-				break;
-			    case SILU:
-			    default:
-				break;
-			}
-#ifdef DEBUG
-			num_drop_U++;
-#endif
-		    }
-		    dense[irow] = zero;
-		}
-
-	    }
-
-	}
-
-    } /* for each segment... */
-
-    xusub[jcol + 1] = nextu;	  /* Close U[*,jcol] */
-    m = xusub[jcol + 1] - xusub[jcol];
-
-    /* second dropping rule */
-    if (drop_rule & DROP_SECONDARY && m > quota) {
-	register double tol = d_max;
-	register int m0 = xusub[jcol] + m - 1;
-
-	if (quota > 0) {
-	    if (drop_rule & DROP_INTERP) {
-		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
-		tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
-	    } else {
-		A = &ucol[xusub[jcol]];
-		for (i = 0; i < m; i++) work[i] = i;
-		qsort(work, m, sizeof(int), _compare_);
-		tol = fabs(usub[xusub[jcol] + work[quota]]);
-	    }
-	}
-	for (i = xusub[jcol]; i <= m0; ) {
-	    if (z_abs1(&ucol[i]) <= tol) {
-		switch (milu) {
-		    case SMILU_1:
-		    case SMILU_2:
-			z_add(sum, sum, &ucol[i]);
-			break;
-		    case SMILU_3:
-			sum->r += tmp;
-			break;
-		    case SILU:
-		    default:
-			break;
-		}
-		ucol[i] = ucol[m0];
-		usub[i] = usub[m0];
-		m0--;
-		m--;
-#ifdef DEBUG
-		num_drop_U++;
-#endif
-		xusub[jcol + 1]--;
-		continue;
-	    }
-	    i++;
-	}
-    }
-
-    if (milu == SMILU_2) {
-        sum->r = z_abs1(sum); sum->i = 0.0;
-    }
-    if (milu == SMILU_3) sum->i = 0.0;
-
-    *nnzUj += m;
-
-    return 0;
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/mc64ad.f.bak b/thirdparty/superlu/SuperLU_4.1/SRC/mc64ad.f.bak
deleted file mode 100644
index c68c4d7..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/mc64ad.f.bak
+++ /dev/null
@@ -1,2002 +0,0 @@
-CCCCC COPYRIGHT (c) 1999  Council for the Central Laboratory of the
-CCCCC Research Councils.    All rights reserved.
-CCCCC PACKAGE MC64A/AD
-CCCCC AUTHORS Iain Duff (i.duff@rl.ac.uk) and Jacko Koster (jak@ii.uib.no)
-CCCCC LAST UPDATE 20/09/99
-CCCCC
-C *** Conditions on external use ***
-C
-C The user shall acknowledge the contribution of this
-C package in any publication of material dependent upon the use of
-C the package. The user shall use reasonable endeavours to notify
-C the authors of the package of this publication.
-C
-C The user can modify this code but, at no time
-C shall the right or title to all or any part of this package pass
-C to the user. The user shall make available free of charge
-C to the authors for any purpose all information relating to any
-C alteration or addition made to this package for the purposes of
-C extending the capabilities or enhancing the performance of this
-C package.
-C
-C The user shall not pass this code directly to a third party without the
-C express prior consent of the authors.  Users wanting to licence their
-C own copy of these routines should send email to hsl@aeat.co.uk
-C
-C None of the comments from the Copyright notice up to and including this
-C one shall be removed or altered in any way.
-
-C**********************************************************************
-      SUBROUTINE MC64ID(ICNTL)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-C  Purpose
-C  =======
-C
-C  The components of the array ICNTL control the action of MC64A/AD.
-C  Default values for these are set in this subroutine.
-C
-C  Parameters
-C  ==========
-C
-      INTEGER ICNTL(10)
-C
-C  Local variables
-      INTEGER I
-C
-C    ICNTL(1) has default value 6.
-C     It is the output stream for error messages. If it
-C     is negative, these messages will be suppressed.
-C
-C    ICNTL(2) has default value 6.
-C     It is the output stream for warning messages.
-C     If it is negative, these messages are suppressed.
-C
-C    ICNTL(3) has default value -1.
-C     It is the output stream for monitoring printing.
-C     If it is negative, these messages are suppressed.
-C
-C    ICNTL(4) has default value 0.
-C     If left at the defaut value, the incoming data is checked for
-C     out-of-range indices and duplicates.  Setting ICNTL(4) to any
-C     other will avoid the checks but is likely to cause problems
-C     later if out-of-range indices or duplicates are present.
-C     The user should only set ICNTL(4) non-zero, if the data is
-C     known to avoid these problems.
-C
-C    ICNTL(5) to ICNTL(10) are not used by MC64A/AD but are set to
-C     zero in this routine.
-
-C Initialization of the ICNTL array.
-      ICNTL(1) = 6
-      ICNTL(2) = 6
-      ICNTL(3) = -1
-      DO 10 I = 4,10
-        ICNTL(I) = 0
-   10 CONTINUE
-
-      RETURN
-      END
-
-C**********************************************************************
-      SUBROUTINE MC64AD(JOB,N,NE,IP,IRN,A,NUM,CPERM,LIW,IW,LDW,DW,
-     &           ICNTL,INFO)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-C  Purpose
-C  =======
-C
-C This subroutine attempts to find a column permutation for an NxN 
-C sparse matrix A = {a_ij} that makes the permuted matrix have N 
-C entries on its diagonal.
-C If the matrix is structurally nonsingular, the subroutine optionally
-C returns a column permutation that maximizes the smallest element 
-C on the diagonal, maximizes the sum of the diagonal entries, or 
-C maximizes the product of the diagonal entries of the permuted matrix.
-C For the latter option, the subroutine also finds scaling factors 
-C that may be used to scale the matrix so that the nonzero diagonal 
-C entries of the permuted matrix are one in absolute value and all the 
-C off-diagonal entries are less than or equal to one in absolute value.
-C The natural logarithms of the scaling factors u(i), i=1..N, for the 
-C rows and v(j), j=1..N, for the columns are returned so that the 
-C scaled matrix B = {b_ij} has entries b_ij = a_ij * EXP(u_i + v_j).
-C
-C  Parameters
-C  ==========
-C
-      INTEGER JOB,N,NE,NUM,LIW,LDW
-      INTEGER IP(N+1),IRN(NE),CPERM(N),IW(LIW),ICNTL(10),INFO(10)
-      DOUBLE PRECISION A(NE),DW(LDW)
-C
-C JOB is an INTEGER variable which must be set by the user to
-C control the action. It is not altered by the subroutine.
-C Possible values for JOB are:
-C   1 Compute a column permutation of the matrix so that the
-C     permuted matrix has as many entries on its diagonal as possible. 
-C     The values on the diagonal are of arbitrary size. HSL subroutine 
-C     MC21A/AD is used for this. See [1].
-C   2 Compute a column permutation of the matrix so that the smallest 
-C     value on the diagonal of the permuted matrix is maximized.
-C     See [3].
-C   3 Compute a column permutation of the matrix so that the smallest
-C     value on the diagonal of the permuted matrix is maximized.
-C     The algorithm differs from the one used for JOB = 2 and may
-C     have quite a different performance. See [2].
-C   4 Compute a column permutation of the matrix so that the sum
-C     of the diagonal entries of the permuted matrix is maximized.
-C     See [3].
-C   5 Compute a column permutation of the matrix so that the product
-C     of the diagonal entries of the permuted matrix is maximized
-C     and vectors to scale the matrix so that the nonzero diagonal 
-C     entries of the permuted matrix are one in absolute value and 
-C     all the off-diagonal entries are less than or equal to one in 
-C     absolute value. See [3].
-C  Restriction: 1 <= JOB <= 5.
-C 
-C N is an INTEGER variable which must be set by the user to the
-C   order of the matrix A. It is not altered by the subroutine.
-C   Restriction: N >= 1.
-C
-C NE is an INTEGER variable which must be set by the user to the
-C   number of entries in the matrix. It is not altered by the 
-C   subroutine.
-C   Restriction: NE >= 1.
-C
-C IP is an INTEGER array of length N+1.
-C   IP(J), J=1..N, must be set by the user to the position in array IRN 
-C   of the first row index of an entry in column J. IP(N+1) must be set
-C   to NE+1. It is not altered by the subroutine.
-C
-C IRN is an INTEGER array of length NE. 
-C   IRN(K), K=1..NE, must be set by the user to hold the row indices of
-C   the entries of the matrix. Those belonging to column J must be 
-C   stored contiguously in the positions IP(J)..IP(J+1)-1. The ordering
-C   of the row indices within each column is unimportant. Repeated 
-C   entries are not allowed. The array IRN is not altered by the 
-C   subroutine.
-C
-C A is a REAL (DOUBLE PRECISION in the D-version) array of length NE. 
-C   The user must set A(K), K=1..NE, to the numerical value of the 
-C   entry that corresponds to IRN(K). 
-C   It is not used by the subroutine when JOB = 1. 
-C   It is not altered by the subroutine.
-C
-C NUM is an INTEGER variable that need not be set by the user.
-C   On successful exit, NUM will be the number of entries on the 
-C   diagonal of the permuted matrix.
-C   If NUM < N, the matrix is structurally singular.
-C
-C CPERM is an INTEGER array of length N that need not be set by the 
-C   user. On successful exit, CPERM contains the column permutation.
-C   Column CPERM(J) of the original matrix is column J in the permuted 
-C   matrix, J=1..N.
-C
-C LIW is an INTEGER variable that must be set by the user to
-C   the dimension of array IW. It is not altered by the subroutine.
-C   Restriction:
-C     JOB = 1 :  LIW >= 5N
-C     JOB = 2 :  LIW >= 4N
-C     JOB = 3 :  LIW >= 10N + NE
-C     JOB = 4 :  LIW >= 5N
-C     JOB = 5 :  LIW >= 5N
-C 
-C IW is an INTEGER array of length LIW that is used for workspace.
-C 
-C LDW is an INTEGER variable that must be set by the user to the
-C   dimension of array DW. It is not altered by the subroutine.
-C   Restriction:
-C     JOB = 1 :  LDW is not used
-C     JOB = 2 :  LDW >= N
-C     JOB = 3 :  LDW >= NE
-C     JOB = 4 :  LDW >= 2N + NE
-C     JOB = 5 :  LDW >= 3N + NE
-C 
-C DW is a REAL (DOUBLE PRECISION in the D-version) array of length LDW 
-C   that is used for workspace. If JOB = 5, on return,
-C   DW(i) contains u_i, i=1..N, and DW(N+j) contains v_j, j=1..N.
-C 
-C ICNTL is an INTEGER array of length 10. Its components control the 
-C   output of MC64A/AD and must be set by the user before calling
-C   MC64A/AD. They are not altered by the subroutine.
-C
-C   ICNTL(1) must be set to specify the output stream for
-C   error messages. If ICNTL(1) < 0, messages are suppressed.
-C   The default value set by MC46I/ID is 6.
-C
-C   ICNTL(2) must be set by the user to specify the output stream for
-C   warning messages. If ICNTL(2) < 0, messages are suppressed.
-C   The default value set by MC46I/ID is 6.
-C
-C   ICNTL(3) must be set by the user to specify the output stream for
-C   diagnostic messages. If ICNTL(3) < 0, messages are suppressed.
-C   The default value set by MC46I/ID is -1.
-C
-C   ICNTL(4) must be set by the user to a value other than 0 to avoid
-C   checking of the input data.
-C   The default value set by MC46I/ID is 0.
-C    
-C INFO is an INTEGER array of length 10 which need not be set by the 
-C   user. INFO(1) is set non-negative to indicate success. A negative 
-C   value is returned if an error occurred, a positive value if a 
-C   warning occurred. INFO(2) holds further information on the error.
-C   On exit from the subroutine, INFO(1) will take one of the 
-C   following values:
-C    0 : successful entry (for structurally nonsingular matrix).
-C   +1 : successful entry (for structurally singular matrix).
-C   +2 : the returned scaling factors are large and may cause
-C        overflow when used to scale the matrix. 
-C        (For JOB = 5 entry only.)
-C   -1 : JOB < 1 or JOB > 5.  Value of JOB held in INFO(2).
-C   -2 : N < 1.  Value of N held in INFO(2).
-C   -3 : NE < 1. Value of NE held in INFO(2).
-C   -4 : the defined length LIW violates the restriction on LIW.
-C        Value of LIW required given by INFO(2).
-C   -5 : the defined length LDW violates the restriction on LDW.
-C        Value of LDW required given by INFO(2).
-C   -6 : entries are found whose row indices are out of range. INFO(2)
-C        contains the index of a column in which such an entry is found.
-C   -7 : repeated entries are found. INFO(2) contains the index of a 
-C        column in which such entries are found.
-C  INFO(3) to INFO(10) are not currently used and are set to zero by 
-C        the routine.
-C
-C References:
-C  [1]  I. S. Duff, (1981),
-C       "Algorithm 575. Permutations for a zero-free diagonal",
-C       ACM Trans. Math. Software 7(3), 387-390.
-C  [2]  I. S. Duff and J. Koster, (1998),
-C       "The design and use of algorithms for permuting large
-C       entries to the diagonal of sparse matrices",
-C       SIAM J. Matrix Anal. Appl., vol. 20, no. 4, pp. 889-901.
-C  [3]  I. S. Duff and J. Koster, (1999),
-C       "On algorithms for permuting large entries to the diagonal 
-C       of sparse matrices",
-C       Technical Report RAL-TR-1999-030, RAL, Oxfordshire, England.
-
-C Local variables and parameters
-      INTEGER I,J,K
-      DOUBLE PRECISION FACT,ZERO,RINF
-      PARAMETER (ZERO=0.0D+00)
-C External routines and functions
-c     EXTERNAL FD05AD
-c     DOUBLE PRECISION FD05AD
-      EXTERNAL MC21AD,MC64BD,MC64RD,MC64SD,MC64WD, DLAMCH
-      DOUBLE PRECISION DLAMCH
-C Intrinsic functions
-      INTRINSIC ABS,LOG
-
-C Set RINF to largest positive real number (infinity)
-c XSL    RINF = FD05AD(5)
-      RINF = DLAMCH('Overflow')
-
-C Check value of JOB
-      IF (JOB.LT.1 .OR. JOB.GT.5) THEN
-        INFO(1) = -1
-        INFO(2) = JOB
-        IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9001) INFO(1),'JOB',JOB
-        GO TO 99
-      ENDIF
-C Check value of N
-      IF (N.LT.1) THEN
-        INFO(1) = -2
-        INFO(2) = N
-        IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9001) INFO(1),'N',N
-        GO TO 99
-      ENDIF
-C Check value of NE
-      IF (NE.LT.1) THEN
-        INFO(1) = -3
-        INFO(2) = NE
-        IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9001) INFO(1),'NE',NE
-        GO TO 99
-      ENDIF
-C Check LIW
-      IF (JOB.EQ.1) K = 5*N
-      IF (JOB.EQ.2) K = 4*N
-      IF (JOB.EQ.3) K = 10*N + NE
-      IF (JOB.EQ.4) K = 5*N
-      IF (JOB.EQ.5) K = 5*N
-      IF (LIW.LT.K) THEN
-        INFO(1) = -4
-        INFO(2) = K
-        IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9004) INFO(1),K
-        GO TO 99
-      ENDIF
-C Check LDW
-C If JOB = 1, do not check
-      IF (JOB.GT.1) THEN
-        IF (JOB.EQ.2) K = N
-        IF (JOB.EQ.3) K = NE
-        IF (JOB.EQ.4) K = 2*N + NE
-        IF (JOB.EQ.5) K = 3*N + NE
-        IF (LDW.LT.K) THEN
-          INFO(1) = -5
-          INFO(2) = K
-          IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9005) INFO(1),K
-          GO TO 99
-        ENDIF
-      ENDIF
-      IF (ICNTL(4).EQ.0) THEN
-C Check row indices. Use IW(1:N) as workspace
-        DO 3 I = 1,N
-          IW(I) = 0
-    3   CONTINUE
-        DO 6 J = 1,N
-          DO 4 K = IP(J),IP(J+1)-1
-            I = IRN(K)
-C Check for row indices that are out of range
-            IF (I.LT.1 .OR. I.GT.N) THEN
-              INFO(1) = -6
-              INFO(2) = J
-              IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9006) INFO(1),J,I
-              GO TO 99
-            ENDIF
-C Check for repeated row indices within a column
-            IF (IW(I).EQ.J) THEN
-              INFO(1) = -7
-              INFO(2) = J
-              IF (ICNTL(1).GE.0) WRITE(ICNTL(1),9007) INFO(1),J,I 
-              GO TO 99
-            ELSE
-              IW(I) = J
-            ENDIF
-    4     CONTINUE
-    6   CONTINUE
-      ENDIF
-
-C Print diagnostics on input
-      IF (ICNTL(3).GE.0) THEN
-        WRITE(ICNTL(3),9020) JOB,N,NE
-        WRITE(ICNTL(3),9021) (IP(J),J=1,N+1)
-        WRITE(ICNTL(3),9022) (IRN(J),J=1,NE)
-        IF (JOB.GT.1) WRITE(ICNTL(3),9023) (A(J),J=1,NE)
-      ENDIF
-
-C Set components of INFO to zero
-      DO 8 I=1,10
-        INFO(I) = 0
-    8 CONTINUE
-
-C Compute maximum matching with MC21A/AD
-      IF (JOB.EQ.1) THEN
-C Put length of column J in IW(J)
-        DO 10 J = 1,N
-          IW(J) = IP(J+1) - IP(J)
-   10   CONTINUE
-C IW(N+1:5N) is workspace
-        CALL MC21AD(N,IRN,NE,IP,IW(1),CPERM,NUM,IW(N+1))
-        GO TO 90
-      ENDIF
-
-C Compute bottleneck matching
-      IF (JOB.EQ.2) THEN
-C IW(1:5N), DW(1:N) are workspaces
-        CALL MC64BD(N,NE,IP,IRN,A,CPERM,NUM,
-     &     IW(1),IW(N+1),IW(2*N+1),IW(3*N+1),DW)
-        GO TO 90
-      ENDIF
-
-C Compute bottleneck matching
-      IF (JOB.EQ.3) THEN
-C Copy IRN(K) into IW(K), ABS(A(K)) into DW(K), K=1..NE
-        DO 20 K = 1,NE
-          IW(K) = IRN(K)
-          DW(K) = ABS(A(K))
-   20   CONTINUE
-C Sort entries in each column by decreasing value. 
-        CALL MC64RD(N,NE,IP,IW,DW)
-C IW(NE+1:NE+10N) is workspace
-        CALL MC64SD(N,NE,IP,IW(1),DW,CPERM,NUM,IW(NE+1),
-     &     IW(NE+N+1),IW(NE+2*N+1),IW(NE+3*N+1),IW(NE+4*N+1),
-     &     IW(NE+5*N+1),IW(NE+6*N+1))
-        GO TO 90
-      ENDIF
-
-      IF (JOB.EQ.4) THEN
-        DO 50 J = 1,N
-          FACT = ZERO
-          DO 30 K = IP(J),IP(J+1)-1
-            IF (ABS(A(K)).GT.FACT) FACT = ABS(A(K))
-   30     CONTINUE
-          DO 40 K = IP(J),IP(J+1)-1
-            DW(2*N+K) = FACT - ABS(A(K))
-   40     CONTINUE
-   50   CONTINUE
-C B = DW(2N+1:2N+NE); IW(1:5N) and DW(1:2N) are workspaces
-        CALL MC64WD(N,NE,IP,IRN,DW(2*N+1),CPERM,NUM,
-     &     IW(1),IW(N+1),IW(2*N+1),IW(3*N+1),IW(4*N+1),
-     &     DW(1),DW(N+1))
-        GO TO 90
-      ENDIF
-
-      IF (JOB.EQ.5) THEN
-        DO 75 J = 1,N
-          FACT = ZERO
-          DO 60 K = IP(J),IP(J+1)-1
-            DW(3*N+K) = ABS(A(K))
-            IF (DW(3*N+K).GT.FACT) FACT = DW(3*N+K)
-   60     CONTINUE
-          DW(2*N+J) = FACT
-          IF (FACT.NE.ZERO) THEN
-            FACT = LOG(FACT)
-          ELSE
-            FACT = RINF/N
-          ENDIF
-          DO 70 K = IP(J),IP(J+1)-1
-            IF (DW(3*N+K).NE.ZERO) THEN
-              DW(3*N+K) = FACT - LOG(DW(3*N+K))
-            ELSE
-              DW(3*N+K) = RINF/N
-            ENDIF
-   70     CONTINUE
-   75   CONTINUE
-C B = DW(3N+1:3N+NE); IW(1:5N) and DW(1:2N) are workspaces
-        CALL MC64WD(N,NE,IP,IRN,DW(3*N+1),CPERM,NUM,
-     &     IW(1),IW(N+1),IW(2*N+1),IW(3*N+1),IW(4*N+1),
-     &     DW(1),DW(N+1))
-        IF (NUM.EQ.N) THEN
-          DO 80 J = 1,N
-            IF (DW(2*N+J).NE.ZERO) THEN
-              DW(N+J) = DW(N+J) - LOG(DW(2*N+J))
-            ELSE
-              DW(N+J) = ZERO
-            ENDIF
-   80     CONTINUE
-        ENDIF
-C Check size of scaling factors
-        FACT = 0.5*LOG(RINF)
-        DO 86 J = 1,N
-          IF (DW(J).LT.FACT .AND. DW(N+J).LT.FACT) GO TO 86
-          INFO(1) = 2
-          GO TO 90
-   86   CONTINUE 
-C       GO TO 90
-      ENDIF
-
-   90 IF (INFO(1).EQ.0 .AND. NUM.LT.N) THEN
-C Matrix is structurally singular, return with warning
-        INFO(1) = 1
-        IF (ICNTL(2).GE.0) WRITE(ICNTL(2),9011) INFO(1)
-      ENDIF
-      IF (INFO(1).EQ.2) THEN
-C Scaling factors are large, return with warning
-        IF (ICNTL(2).GE.0) WRITE(ICNTL(2),9012) INFO(1)
-      ENDIF
-
-C Print diagnostics on output
-      IF (ICNTL(3).GE.0) THEN
-        WRITE(ICNTL(3),9030) (INFO(J),J=1,2)
-        WRITE(ICNTL(3),9031) NUM
-        WRITE(ICNTL(3),9032) (CPERM(J),J=1,N)
-        IF (JOB.EQ.5) THEN
-          WRITE(ICNTL(3),9033) (DW(J),J=1,N)
-          WRITE(ICNTL(3),9034) (DW(N+J),J=1,N)
-        ENDIF
-      ENDIF
-
-C Return from subroutine.
-   99 RETURN
-
- 9001 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2,
-     &        ' because ',(A),' = ',I10)
- 9004 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2/
-     &        '        LIW too small, must be at least ',I8)
- 9005 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2/
-     &        '        LDW too small, must be at least ',I8)
- 9006 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2/
-     &        '        Column ',I8,
-     &        ' contains an entry with invalid row index ',I8)
- 9007 FORMAT (' ****** Error in MC64A/AD. INFO(1) = ',I2/
-     &        '        Column ',I8,
-     &        ' contains two or more entries with row index ',I8)
- 9011 FORMAT (' ****** Warning from MC64A/AD. INFO(1) = ',I2/
-     &        '        The matrix is structurally singular.')
- 9012 FORMAT (' ****** Warning from MC64A/AD. INFO(1) = ',I2/
-     &        '        Some scaling factors may be too large.')
- 9020 FORMAT (' ****** Input parameters for MC64A/AD:'/
-     &        ' JOB = ',I8/' N   = ',I8/' NE  = ',I8)
- 9021 FORMAT (' IP(1:N+1)  = ',8I8/(14X,8I8))
- 9022 FORMAT (' IRN(1:NE)  = ',8I8/(14X,8I8))
- 9023 FORMAT (' A(1:NE)    = ',4(1PD14.4)/(14X,4(1PD14.4)))
- 9030 FORMAT (' ****** Output parameters for MC64A/AD:'/
-     &        ' INFO(1:2)  = ',2I8)
- 9031 FORMAT (' NUM        = ',I8)
- 9032 FORMAT (' CPERM(1:N) = ',8I8/(14X,8I8))
- 9033 FORMAT (' DW(1:N)    = ',5(F11.3)/(14X,5(F11.3)))
- 9034 FORMAT (' DW(N+1:2N) = ',5(F11.3)/(14X,5(F11.3)))
-      END
-
-C**********************************************************************
-      SUBROUTINE MC64BD(N,NE,IP,IRN,A,IPERM,NUM,JPERM,PR,Q,L,D) 
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER N,NE,NUM
-      INTEGER IP(N+1),IRN(NE),IPERM(N),JPERM(N),PR(N),Q(N),L(N)
-      DOUBLE PRECISION A(NE),D(N)
-
-C N, NE, IP, IRN are described in MC64A/AD.
-C A is a REAL (DOUBLE PRECISION in the D-version) array of length
-C   NE. A(K), K=1..NE, must be set to the value of the entry
-C   that corresponds to IRN(K). It is not altered.
-C IPERM is an INTEGER array of length N. On exit, it contains the 
-C    matching: IPERM(I) = 0 or row I is matched to column IPERM(I).
-C NUM is INTEGER variable. On exit, it contains the cardinality of the
-C    matching stored in IPERM.
-C IW is an INTEGER work array of length 4N.
-C DW is a REAL (DOUBLE PRECISION in D-version) work array of length N.
-
-C Local variables
-      INTEGER I,II,J,JJ,JORD,Q0,QLEN,IDUM,JDUM,ISP,JSP,
-     &        K,KK,KK1,KK2,I0,UP,LOW
-      DOUBLE PRECISION CSP,DI,DNEW,DQ0,AI,A0,BV
-C Local parameters 
-      DOUBLE PRECISION RINF,ZERO,MINONE
-      PARAMETER (ZERO=0.0D+0,MINONE=-1.0D+0)
-C Intrinsic functions
-      INTRINSIC ABS,MIN
-C External subroutines and/or functions
-c      EXTERNAL FD05AD,MC64DD,MC64ED,MC64FD, DLAMCH
-c      DOUBLE PRECISION FD05AD, DLAMCH
-      EXTERNAL MC64DD,MC64ED,MC64FD, DLAMCH
-      DOUBLE PRECISION DLAMCH
-
-C Set RINF to largest positive real number
-c XSL  RINF = FD05AD(5)
-      RINF = DLAMCH('Overflow')
-
-C Initialization
-      NUM = 0
-      BV = RINF
-      DO 10 K = 1,N
-        IPERM(K) = 0
-        JPERM(K) = 0
-        PR(K) = IP(K)
-        D(K) = ZERO
-   10 CONTINUE
-C Scan columns of matrix;
-      DO 20 J = 1,N
-        A0 = MINONE
-        DO 30 K = IP(J),IP(J+1)-1
-          I = IRN(K)
-          AI = ABS(A(K))
-          IF (AI.GT.D(I)) D(I) = AI
-          IF (JPERM(J).NE.0) GO TO 30
-          IF (AI.GE.BV) THEN
-            A0 = BV
-            IF (IPERM(I).NE.0) GO TO 30
-            JPERM(J) = I 
-            IPERM(I) = J
-            NUM = NUM + 1
-          ELSE
-            IF (AI.LE.A0) GO TO 30
-            A0 = AI
-            I0 = I
-          ENDIF
-   30   CONTINUE
-        IF (A0.NE.MINONE .AND. A0.LT.BV) THEN
-          BV = A0
-          IF (IPERM(I0).NE.0) GO TO 20
-          IPERM(I0) = J
-          JPERM(J) = I0
-          NUM = NUM + 1
-        ENDIF
-   20 CONTINUE
-C Update BV with smallest of all the largest maximum absolute values 
-C of the rows.
-      DO 25 I = 1,N
-        BV = MIN(BV,D(I))
-   25 CONTINUE
-      IF (NUM.EQ.N) GO TO 1000
-C Rescan unassigned columns; improve initial assignment
-      DO 95 J = 1,N
-        IF (JPERM(J).NE.0) GO TO 95
-        DO 50 K = IP(J),IP(J+1)-1
-          I = IRN(K)
-          AI = ABS(A(K))
-          IF (AI.LT.BV) GO TO 50
-          IF (IPERM(I).EQ.0) GO TO 90
-          JJ = IPERM(I)
-          KK1 = PR(JJ)
-          KK2 = IP(JJ+1) - 1
-          IF (KK1.GT.KK2) GO TO 50
-          DO 70 KK = KK1,KK2
-            II = IRN(KK)
-            IF (IPERM(II).NE.0) GO TO 70
-            IF (ABS(A(KK)).GE.BV) GO TO 80
-   70     CONTINUE
-          PR(JJ) = KK2 + 1
-   50   CONTINUE
-        GO TO 95
-   80   JPERM(JJ) = II
-        IPERM(II) = JJ
-        PR(JJ) = KK + 1
-   90   NUM = NUM + 1
-        JPERM(J) = I
-        IPERM(I) = J
-        PR(J) = K + 1
-   95 CONTINUE
-      IF (NUM.EQ.N) GO TO 1000
-
-C Prepare for main loop
-      DO 99 I = 1,N
-        D(I) = MINONE
-        L(I) = 0
-   99 CONTINUE
-
-C Main loop ... each pass round this loop is similar to Dijkstra's 
-C algorithm for solving the single source shortest path problem 
-
-      DO 100 JORD = 1,N
-
-        IF (JPERM(JORD).NE.0) GO TO 100
-        QLEN = 0
-        LOW = N + 1
-        UP = N + 1
-C CSP is cost of shortest path to any unassigned row
-C ISP is matrix position of unassigned row element in shortest path
-C JSP is column index of unassigned row element in shortest path
-        CSP = MINONE
-C Build shortest path tree starting from unassigned column JORD
-        J = JORD
-        PR(J) = -1
-
-C Scan column J
-        DO 115 K = IP(J),IP(J+1)-1
-          I = IRN(K)
-          DNEW = ABS(A(K))
-          IF (CSP.GE.DNEW) GO TO 115
-          IF (IPERM(I).EQ.0) THEN
-C Row I is unassigned; update shortest path info
-            CSP = DNEW
-            ISP = I
-            JSP = J
-            IF (CSP.GE.BV) GO TO 160
-          ELSE
-            D(I) = DNEW
-            IF (DNEW.GE.BV) THEN
-C Add row I to Q2
-              LOW = LOW - 1
-              Q(LOW) = I
-            ELSE
-C Add row I to Q, and push it 
-              QLEN = QLEN + 1
-              L(I) = QLEN
-              CALL MC64DD(I,N,Q,D,L,1)
-            ENDIF
-            JJ = IPERM(I)
-            PR(JJ) = J
-          ENDIF
-  115   CONTINUE
-
-        DO 150 JDUM = 1,NUM
-C If Q2 is empty, extract new rows from Q
-          IF (LOW.EQ.UP) THEN
-            IF (QLEN.EQ.0) GO TO 160
-            I = Q(1)
-            IF (CSP.GE.D(I)) GO TO 160
-            BV = D(I)
-            DO 152 IDUM = 1,N
-              CALL MC64ED(QLEN,N,Q,D,L,1)
-              L(I) = 0
-              LOW = LOW - 1
-              Q(LOW) = I
-              IF (QLEN.EQ.0) GO TO 153
-              I = Q(1)
-              IF (D(I).NE.BV) GO TO 153
-  152       CONTINUE
-C End of dummy loop; this point is never reached
-          ENDIF
-C Move row Q0 
-  153     UP = UP - 1
-          Q0 = Q(UP)
-          DQ0 = D(Q0)
-          L(Q0) = UP
-C Scan column that matches with row Q0
-          J = IPERM(Q0)
-          DO 155 K = IP(J),IP(J+1)-1
-            I = IRN(K)
-C Update D(I) 
-            IF (L(I).GE.UP) GO TO 155
-            DNEW = MIN(DQ0,ABS(A(K)))
-            IF (CSP.GE.DNEW) GO TO 155
-            IF (IPERM(I).EQ.0) THEN
-C Row I is unassigned; update shortest path info
-              CSP = DNEW
-              ISP = I
-              JSP = J
-              IF (CSP.GE.BV) GO TO 160
-            ELSE
-              DI = D(I)
-              IF (DI.GE.BV .OR. DI.GE.DNEW) GO TO 155
-              D(I) = DNEW
-              IF (DNEW.GE.BV) THEN
-C Delete row I from Q (if necessary); add row I to Q2
-                IF (DI.NE.MINONE)
-     *            CALL MC64FD(L(I),QLEN,N,Q,D,L,1)
-                L(I) = 0
-                LOW = LOW - 1
-                Q(LOW) = I
-              ELSE
-C Add row I to Q (if necessary); push row I up Q
-                IF (DI.EQ.MINONE) THEN
-                  QLEN = QLEN + 1
-                  L(I) = QLEN
-                ENDIF
-                CALL MC64DD(I,N,Q,D,L,1)
-              ENDIF
-C Update tree
-              JJ = IPERM(I)
-              PR(JJ) = J
-            ENDIF
-  155     CONTINUE
-  150   CONTINUE
-       
-C If CSP = MINONE, no augmenting path is found
-  160   IF (CSP.EQ.MINONE) GO TO 190
-C Update bottleneck value
-        BV = MIN(BV,CSP)
-C Find augmenting path by tracing backward in PR; update IPERM,JPERM
-        NUM = NUM + 1
-        I = ISP
-        J = JSP
-        DO 170 JDUM = 1,NUM+1
-          I0 = JPERM(J)
-          JPERM(J) = I
-          IPERM(I) = J
-          J = PR(J)
-          IF (J.EQ.-1) GO TO 190
-          I = I0
-  170   CONTINUE
-C End of dummy loop; this point is never reached
-  190   DO 191 KK = UP,N
-          I = Q(KK)
-          D(I) = MINONE
-          L(I) = 0
-  191   CONTINUE 
-        DO 192 KK = LOW,UP-1
-          I = Q(KK)
-          D(I) = MINONE
-  192   CONTINUE
-        DO 193 KK = 1,QLEN
-          I = Q(KK)
-          D(I) = MINONE
-          L(I) = 0
-  193   CONTINUE
-
-  100 CONTINUE
-C End of main loop
-
-C BV is bottleneck value of final matching
-      IF (NUM.EQ.N) GO TO 1000
-
-C Matrix is structurally singular, complete IPERM.
-C JPERM, PR are work arrays
-      DO 300 J = 1,N
-        JPERM(J) = 0
-  300 CONTINUE
-      K = 0
-      DO 310 I = 1,N
-        IF (IPERM(I).EQ.0) THEN
-          K = K + 1
-          PR(K) = I
-        ELSE
-          J = IPERM(I)
-          JPERM(J) = I
-        ENDIF
-  310 CONTINUE
-      K = 0
-      DO 320 I = 1,N
-        IF (JPERM(I).NE.0) GO TO 320
-        K = K + 1
-        JDUM = PR(K)
-        IPERM(JDUM) = I
-  320 CONTINUE
-
- 1000 RETURN
-      END
-
-C**********************************************************************
-      SUBROUTINE MC64DD(I,N,Q,D,L,IWAY)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER I,N,IWAY
-      INTEGER Q(N),L(N)
-      DOUBLE PRECISION D(N)
-
-C Variables N,Q,D,L are described in MC64B/BD
-C IF IWAY is equal to 1, then
-C node I is pushed from its current position upwards
-C IF IWAY is not equal to 1, then
-C node I is pushed from its current position downwards
-
-C Local variables and parameters
-      INTEGER IDUM,K,POS,POSK,QK
-      PARAMETER (K=2)
-      DOUBLE PRECISION DI
-
-      DI = D(I)
-      POS = L(I)
-C POS is index of current position of I in the tree
-      IF (IWAY.EQ.1) THEN
-        DO 10 IDUM = 1,N
-          IF (POS.LE.1) GO TO 20
-          POSK = POS/K
-          QK = Q(POSK)
-          IF (DI.LE.D(QK)) GO TO 20 
-          Q(POS) = QK
-          L(QK) = POS 
-          POS = POSK
-   10   CONTINUE
-C End of dummy loop; this point is never reached
-      ELSE
-        DO 15 IDUM = 1,N
-          IF (POS.LE.1) GO TO 20
-          POSK = POS/K
-          QK = Q(POSK)
-          IF (DI.GE.D(QK)) GO TO 20
-          Q(POS) = QK
-          L(QK) = POS
-          POS = POSK
-   15   CONTINUE
-C End of dummy loop; this point is never reached
-      ENDIF
-C End of dummy if; this point is never reached
-   20 Q(POS) = I
-      L(I) = POS
-
-      RETURN
-      END
- 
-C**********************************************************************
-      SUBROUTINE MC64ED(QLEN,N,Q,D,L,IWAY)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER QLEN,N,IWAY
-      INTEGER Q(N),L(N)
-      DOUBLE PRECISION D(N)
-
-C Variables QLEN,N,Q,D,L are described in MC64B/BD (IWAY = 1) or
-C     MC64W/WD (IWAY = 2)
-C The root node is deleted from the binary heap.
-
-C Local variables and parameters
-      INTEGER I,IDUM,K,POS,POSK
-      PARAMETER (K=2)
-      DOUBLE PRECISION DK,DR,DI
-
-C Move last element to begin of Q
-      I = Q(QLEN)
-      DI = D(I)
-      QLEN = QLEN - 1
-      POS = 1
-      IF (IWAY.EQ.1) THEN
-        DO 10 IDUM = 1,N
-          POSK = K*POS
-          IF (POSK.GT.QLEN) GO TO 20
-          DK = D(Q(POSK))
-          IF (POSK.LT.QLEN) THEN
-            DR = D(Q(POSK+1))
-            IF (DK.LT.DR) THEN
-              POSK = POSK + 1
-              DK = DR
-            ENDIF
-          ENDIF
-          IF (DI.GE.DK) GO TO 20
-C Exchange old last element with larger priority child
-          Q(POS) = Q(POSK)
-          L(Q(POS)) = POS
-          POS = POSK
-   10   CONTINUE
-C End of dummy loop; this point is never reached
-      ELSE
-        DO 15 IDUM = 1,N
-          POSK = K*POS
-          IF (POSK.GT.QLEN) GO TO 20
-          DK = D(Q(POSK))
-          IF (POSK.LT.QLEN) THEN
-            DR = D(Q(POSK+1))
-            IF (DK.GT.DR) THEN
-              POSK = POSK + 1
-              DK = DR
-            ENDIF
-          ENDIF
-          IF (DI.LE.DK) GO TO 20
-C Exchange old last element with smaller child
-          Q(POS) = Q(POSK)
-          L(Q(POS)) = POS
-          POS = POSK
-   15   CONTINUE
-C End of dummy loop; this point is never reached
-      ENDIF
-C End of dummy if; this point is never reached
-   20 Q(POS) = I
-      L(I) = POS
-
-      RETURN
-      END
-
-C**********************************************************************
-      SUBROUTINE MC64FD(POS0,QLEN,N,Q,D,L,IWAY)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER POS0,QLEN,N,IWAY
-      INTEGER Q(N),L(N)
-      DOUBLE PRECISION D(N)
-
-C Variables QLEN,N,Q,D,L are described in MC64B/BD (IWAY = 1) or
-C     MC64WD (IWAY = 2).
-C Move last element in the heap 
-
-      INTEGER I,IDUM,K,POS,POSK,QK
-      PARAMETER (K=2)
-      DOUBLE PRECISION DK,DR,DI
- 
-C Quick return, if possible
-      IF (QLEN.EQ.POS0) THEN
-        QLEN = QLEN - 1
-        RETURN
-      ENDIF
-
-C Move last element from queue Q to position POS0
-C POS is current position of node I in the tree
-      I = Q(QLEN)
-      DI = D(I)
-      QLEN = QLEN - 1
-      POS = POS0
-      IF (IWAY.EQ.1) THEN
-        DO 10 IDUM = 1,N
-          IF (POS.LE.1) GO TO 20
-          POSK = POS/K
-          QK = Q(POSK)
-          IF (DI.LE.D(QK)) GO TO 20 
-          Q(POS) = QK
-          L(QK) = POS 
-          POS = POSK
-   10   CONTINUE
-C End of dummy loop; this point is never reached
-   20   Q(POS) = I
-        L(I) = POS
-        DO 30 IDUM = 1,N
-          POSK = K*POS
-          IF (POSK.GT.QLEN) GO TO 40
-          DK = D(Q(POSK))
-          IF (POSK.LT.QLEN) THEN
-            DR = D(Q(POSK+1))
-            IF (DK.LT.DR) THEN
-              POSK = POSK + 1
-              DK = DR
-            ENDIF
-          ENDIF
-          IF (DI.GE.DK) GO TO 40
-          QK = Q(POSK)
-          Q(POS) = QK
-          L(QK) = POS
-          POS = POSK
-   30   CONTINUE
-C End of dummy loop; this point is never reached
-      ELSE
-        DO 32 IDUM = 1,N
-          IF (POS.LE.1) GO TO 34
-          POSK = POS/K
-          QK = Q(POSK)
-          IF (DI.GE.D(QK)) GO TO 34 
-          Q(POS) = QK
-          L(QK) = POS 
-          POS = POSK
-   32   CONTINUE
-C End of dummy loop; this point is never reached
-   34   Q(POS) = I
-        L(I) = POS
-        DO 36 IDUM = 1,N
-          POSK = K*POS
-          IF (POSK.GT.QLEN) GO TO 40
-          DK = D(Q(POSK))
-          IF (POSK.LT.QLEN) THEN
-            DR = D(Q(POSK+1))
-            IF (DK.GT.DR) THEN
-              POSK = POSK + 1
-              DK = DR
-            ENDIF
-          ENDIF
-          IF (DI.LE.DK) GO TO 40
-          QK = Q(POSK)
-          Q(POS) = QK
-          L(QK) = POS
-          POS = POSK
-   36   CONTINUE
-C End of dummy loop; this point is never reached
-      ENDIF
-C End of dummy if; this point is never reached
-   40 Q(POS) = I
-      L(I) = POS
-
-      RETURN
-      END
-
-C**********************************************************************
-      SUBROUTINE MC64RD(N,NE,IP,IRN,A)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER N,NE
-      INTEGER IP(N+1),IRN(NE)
-      DOUBLE PRECISION A(NE)
-
-C This subroutine sorts the entries in each column of the 
-C sparse matrix (defined by N,NE,IP,IRN,A) by decreasing
-C numerical value.
-
-C Local constants
-      INTEGER THRESH,TDLEN
-      PARAMETER (THRESH=15,TDLEN=50)
-C Local variables
-      INTEGER J,IPJ,K,LEN,R,S,HI,FIRST,MID,LAST,TD
-      DOUBLE PRECISION HA,KEY
-C Local arrays
-      INTEGER TODO(TDLEN)
-      
-      DO 100 J = 1,N
-        LEN = IP(J+1) - IP(J)
-        IF (LEN.LE.1) GO TO 100
-        IPJ = IP(J)
-
-C Sort array roughly with partial quicksort
-        IF (LEN.LT.THRESH) GO TO 400
-        TODO(1) = IPJ
-        TODO(2) = IPJ + LEN
-        TD = 2
-  500   CONTINUE
-        FIRST = TODO(TD-1)
-        LAST = TODO(TD)
-C KEY is the smallest of two values present in interval [FIRST,LAST)
-        KEY = A((FIRST+LAST)/2)
-        DO 475 K = FIRST,LAST-1
-          HA = A(K)
-          IF (HA.EQ.KEY) GO TO 475
-          IF (HA.GT.KEY) GO TO 470
-          KEY = HA
-          GO TO 470
-  475   CONTINUE
-C Only one value found in interval, so it is already sorted
-        TD = TD - 2
-        GO TO 425
-
-C Reorder interval [FIRST,LAST) such that entries before MID are gt KEY
-  470   MID = FIRST
-        DO 450 K = FIRST,LAST-1
-          IF (A(K).LE.KEY) GO TO 450
-          HA = A(MID)
-          A(MID) = A(K)
-          A(K) = HA
-          HI = IRN(MID)
-          IRN(MID) = IRN(K)
-          IRN(K) = HI
-          MID = MID + 1
-  450   CONTINUE
-C Both subintervals [FIRST,MID), [MID,LAST) are nonempty
-C Stack the longest of the two subintervals first
-        IF (MID-FIRST.GE.LAST-MID) THEN
-          TODO(TD+2) = LAST
-          TODO(TD+1) = MID
-          TODO(TD) = MID
-C          TODO(TD-1) = FIRST
-        ELSE
-          TODO(TD+2) = MID
-          TODO(TD+1) = FIRST
-          TODO(TD) = LAST
-          TODO(TD-1) = MID
-        ENDIF
-        TD = TD + 2
-
-  425   CONTINUE
-        IF (TD.EQ.0) GO TO 400 
-C There is still work to be done
-        IF (TODO(TD)-TODO(TD-1).GE.THRESH) GO TO 500
-C Next interval is already short enough for straightforward insertion
-        TD = TD - 2
-        GO TO 425
-
-C Complete sorting with straightforward insertion
-  400   DO 200 R = IPJ+1,IPJ+LEN-1
-          IF (A(R-1) .LT. A(R)) THEN
-            HA = A(R)
-            HI = IRN(R)
-            A(R) = A(R-1)
-            IRN(R) = IRN(R-1)
-            DO 300 S = R-1,IPJ+1,-1
-              IF (A(S-1) .LT. HA) THEN
-                A(S) = A(S-1)
-                IRN(S) = IRN(S-1)
-              ELSE
-                A(S) = HA
-                IRN(S) = HI
-                GO TO 200 
-              END IF
-  300       CONTINUE
-            A(IPJ) = HA
-            IRN(IPJ) = HI
-          END IF
-  200   CONTINUE
-
-  100 CONTINUE
-
-      RETURN
-      END
-
-C**********************************************************************
-      SUBROUTINE MC64SD(N,NE,IP,IRN,A,IPERM,NUMX,
-     &           W,LEN,LENL,LENH,FC,IW,IW4)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER N,NE,NUMX
-      INTEGER IP(N+1),IRN(NE),IPERM(N), 
-     &        W(N),LEN(N),LENL(N),LENH(N),FC(N),IW(N),IW4(4*N)
-      DOUBLE PRECISION A(NE)
-
-C N, NE, IP, IRN, are described in MC64A/AD.
-C A is a REAL (DOUBLE PRECISION in the D-version) array of length NE.
-C   A(K), K=1..NE, must be set to the value of the entry that 
-C   corresponds to IRN(k). The entries in each column must be 
-C   non-negative and ordered by decreasing value. 
-C IPERM is an INTEGER array of length N. On exit, it contains the 
-C   bottleneck matching: IPERM(I) - 0 or row I is matched to column 
-C   IPERM(I).
-C NUMX is an INTEGER variable. On exit, it contains the cardinality 
-C   of the matching stored in IPERM.
-C IW is an INTEGER work array of length 10N.
-
-C FC is an integer array of length N that contains the list of 
-C   unmatched columns. 
-C LEN(J), LENL(J), LENH(J) are integer arrays of length N that point 
-C   to entries in matrix column J.
-C   In the matrix defined by the column parts IP(J)+LENL(J) we know 
-C   a matching does not exist; in the matrix defined by the column 
-C   parts IP(J)+LENH(J) we know one exists.
-C   LEN(J) lies between LENL(J) and LENH(J) and determines the matrix
-C   that is tested for a maximum matching.
-C W is an integer array of length N and contains the indices of the 
-C   columns for which LENL ne LENH.
-C WLEN is number of indices stored in array W.
-C IW is integer work array of length N.
-C IW4 is integer work array of length 4N used by MC64U/UD.
-
-      INTEGER NUM,NVAL,WLEN,II,I,J,K,L,CNT,MOD,IDUM1,IDUM2,IDUM3
-      DOUBLE PRECISION BVAL,BMIN,BMAX,RINF
-c      EXTERNAL FD05AD,MC64QD,MC64UD
-c      DOUBLE PRECISION FD05AD
-      EXTERNAL MC64QD,MC64UD, DLAMCH
-      DOUBLE PRECISION DLAMCH
-
-C BMIN and BMAX are such that a maximum matching exists for the input
-C   matrix in which all entries smaller than BMIN are dropped. 
-C   For BMAX, a maximum matching does not exist.
-C BVAL is a value between BMIN and BMAX.
-C CNT is the number of calls made to MC64U/UD so far.
-C NUM is the cardinality of last matching found.
-
-C Set RINF to largest positive real number
-c XSL      RINF = FD05AD(5)
-      RINF = DLAMCH('Overflow')
-
-C Compute a first maximum matching from scratch on whole matrix.
-      DO 20 J = 1,N
-        FC(J) = J
-        IW(J) = 0
-        LEN(J) = IP(J+1) - IP(J)
-   20 CONTINUE
-C The first call to MC64U/UD
-      CNT = 1
-      MOD = 1
-      NUMX = 0
-      CALL MC64UD(CNT,MOD,N,IRN,NE,IP,LEN,FC,IW,NUMX,N,
-     &            IW4(1),IW4(N+1),IW4(2*N+1),IW4(3*N+1))
-
-C IW contains a maximum matching of length NUMX.
-      NUM = NUMX
-
-      IF (NUM.NE.N) THEN
-C Matrix is structurally singular
-        BMAX = RINF
-      ELSE
-C Matrix is structurally nonsingular, NUM=NUMX=N;
-C Set BMAX just above the smallest of all the maximum absolute 
-C values of the columns
-        BMAX = RINF
-        DO 30 J = 1,N
-          BVAL = 0.0
-          DO 25 K = IP(J),IP(J+1)-1
-            IF (A(K).GT.BVAL) BVAL = A(K)
-   25     CONTINUE
-          IF (BVAL.LT.BMAX) BMAX = BVAL
-   30   CONTINUE
-        BMAX = 1.001 * BMAX
-      ENDIF
-
-C Initialize BVAL,BMIN
-      BVAL = 0.0
-      BMIN = 0.0
-C Initialize LENL,LEN,LENH,W,WLEN according to BMAX.
-C Set LEN(J), LENH(J) just after last entry in column J. 
-C Set LENL(J) just after last entry in column J with value ge BMAX.
-      WLEN = 0
-      DO 48 J = 1,N
-        L = IP(J+1) - IP(J)
-        LENH(J) = L
-        LEN(J) = L
-        DO 45 K = IP(J),IP(J+1)-1
-          IF (A(K).LT.BMAX) GO TO 46
-   45   CONTINUE
-C Column J is empty or all entries are ge BMAX
-        K = IP(J+1)
-   46   LENL(J) = K - IP(J)
-C Add J to W if LENL(J) ne LENH(J)
-        IF (LENL(J).EQ.L) GO TO 48
-        WLEN = WLEN + 1
-        W(WLEN) = J
-   48 CONTINUE
-
-C Main loop
-      DO 90 IDUM1 = 1,NE
-        IF (NUM.EQ.NUMX) THEN
-C We have a maximum matching in IW; store IW in IPERM
-          DO 50 I = 1,N
-            IPERM(I) = IW(I)
-   50     CONTINUE
-C Keep going round this loop until matching IW is no longer maximum.
-          DO 80 IDUM2 = 1,NE
-            BMIN = BVAL
-            IF (BMAX .EQ. BMIN) GO TO 99
-C Find splitting value BVAL
-            CALL MC64QD(IP,LENL,LEN,W,WLEN,A,NVAL,BVAL)
-            IF (NVAL.LE.1) GO TO 99 
-C Set LEN such that all matrix entries with value lt BVAL are 
-C discarded. Store old LEN in LENH. Do this for all columns W(K).
-C Each step, either K is incremented or WLEN is decremented.
-            K = 1
-            DO 70 IDUM3 = 1,N
-              IF (K.GT.WLEN) GO TO 71
-              J = W(K)
-              DO 55 II = IP(J)+LEN(J)-1,IP(J)+LENL(J),-1
-                IF (A(II).GE.BVAL) GO TO 60 
-                I = IRN(II)
-                IF (IW(I).NE.J) GO TO 55
-C Remove entry from matching
-                IW(I) = 0
-                NUM = NUM - 1
-                FC(N-NUM) = J
-   55         CONTINUE
-   60         LENH(J) = LEN(J)
-C IP(J)+LEN(J)-1 is last entry in column ge BVAL
-              LEN(J) = II - IP(J) + 1
-C If LENH(J) = LENL(J), remove J from W
-              IF (LENL(J).EQ.LENH(J)) THEN
-                W(K) = W(WLEN)
-                WLEN = WLEN - 1
-              ELSE
-                K = K + 1
-              ENDIF
-   70       CONTINUE
-   71       IF (NUM.LT.NUMX) GO TO 81
-   80     CONTINUE
-C End of dummy loop; this point is never reached
-C Set mode for next call to MC64U/UD
-   81     MOD = 1
-        ELSE
-C We do not have a maximum matching in IW. 
-          BMAX = BVAL
-C BMIN is the bottleneck value of a maximum matching;
-C for BMAX the matching is not maximum, so BMAX>BMIN
-C          IF (BMAX .EQ. BMIN) GO TO 99
-C Find splitting value BVAL
-          CALL MC64QD(IP,LEN,LENH,W,WLEN,A,NVAL,BVAL)
-          IF (NVAL.EQ.0. OR. BVAL.EQ.BMIN) GO TO 99 
-C Set LEN such that all matrix entries with value ge BVAL are
-C inside matrix. Store old LEN in LENL. Do this for all columns W(K).
-C Each step, either K is incremented or WLEN is decremented.
-          K = 1
-          DO 87 IDUM3 = 1,N
-            IF (K.GT.WLEN) GO TO 88
-            J = W(K)
-            DO 85 II = IP(J)+LEN(J),IP(J)+LENH(J)-1
-              IF (A(II).LT.BVAL) GO TO 86
-   85       CONTINUE
-   86       LENL(J) = LEN(J)
-            LEN(J) = II - IP(J)
-            IF (LENL(J).EQ.LENH(J)) THEN
-              W(K) = W(WLEN)
-              WLEN = WLEN - 1
-            ELSE
-              K = K + 1
-            ENDIF
-   87     CONTINUE
-C End of dummy loop; this point is never reached
-C Set mode for next call to MC64U/UD
-   88     MOD = 0
-        ENDIF
-        CNT = CNT + 1
-        CALL MC64UD(CNT,MOD,N,IRN,NE,IP,LEN,FC,IW,NUM,NUMX,
-     &            IW4(1),IW4(N+1),IW4(2*N+1),IW4(3*N+1))
-
-C IW contains maximum matching of length NUM
-   90 CONTINUE 
-C End of dummy loop; this point is never reached
-
-C BMIN is bottleneck value of final matching
-   99 IF (NUMX.EQ.N) GO TO 1000
-C The matrix is structurally singular, complete IPERM
-C W, IW are work arrays
-      DO 300 J = 1,N
-        W(J) = 0
-  300 CONTINUE
-      K = 0
-      DO 310 I = 1,N
-        IF (IPERM(I).EQ.0) THEN
-          K = K + 1
-          IW(K) = I
-        ELSE
-          J = IPERM(I)
-          W(J) = I
-        ENDIF
-  310 CONTINUE
-      K = 0
-      DO 320 J = 1,N
-        IF (W(J).NE.0) GO TO 320
-        K = K + 1
-        IDUM1 = IW(K)
-        IPERM(IDUM1) = J
-  320 CONTINUE
-
- 1000 RETURN
-      END
-
-C**********************************************************************
-      SUBROUTINE MC64QD(IP,LENL,LENH,W,WLEN,A,NVAL,VAL)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER WLEN,NVAL
-      INTEGER IP(*),LENL(*),LENH(*),W(*)
-      DOUBLE PRECISION A(*),VAL
-
-C This routine searches for at most XX different numerical values 
-C in the columns W(1:WLEN). XX>=2. 
-C Each column J is scanned between IP(J)+LENL(J) and IP(J)+LENH(J)-1 
-C until XX values are found or all columns have been considered.
-C On output, NVAL is the number of different values that is found 
-C and SPLIT(1:NVAL) contains the values in decreasing order.
-C If NVAL > 0, the routine returns VAL = SPLIT((NVAL+1)/2). 
-C
-      INTEGER XX,J,K,II,S,POS
-      PARAMETER (XX=10)
-      DOUBLE PRECISION SPLIT(XX),HA
-
-C Scan columns in W(1:WLEN). For each encountered value, if value not
-C already present in SPLIT(1:NVAL), insert value such that SPLIT 
-C remains sorted by decreasing value. 
-C The sorting is done by straightforward insertion; therefore the use
-C of this routine should be avoided for large XX (XX < 20).
-      NVAL = 0 
-      DO 10 K = 1,WLEN
-        J = W(K)
-        DO 15 II = IP(J)+LENL(J),IP(J)+LENH(J)-1
-          HA = A(II)
-          IF (NVAL.EQ.0) THEN
-            SPLIT(1) = HA
-            NVAL = 1
-          ELSE
-C Check presence of HA in SPLIT
-            DO 20 S = NVAL,1,-1
-              IF (SPLIT(S).EQ.HA) GO TO 15
-              IF (SPLIT(S).GT.HA) THEN
-                POS = S + 1
-                GO TO 21
-              ENDIF
-  20        CONTINUE
-            POS = 1
-C The insertion 
-  21        DO 22 S = NVAL,POS,-1
-              SPLIT(S+1) = SPLIT(S)
-  22        CONTINUE
-            SPLIT(POS) = HA
-            NVAL = NVAL + 1
-          ENDIF
-C Exit loop if XX values are found
-          IF (NVAL.EQ.XX) GO TO 11
-  15    CONTINUE
-  10  CONTINUE
-C Determine VAL
-  11  IF (NVAL.GT.0) VAL = SPLIT((NVAL+1)/2)
-
-      RETURN
-      END
-      
-C**********************************************************************
-      SUBROUTINE MC64UD(ID,MOD,N,IRN,LIRN,IP,LENC,FC,IPERM,NUM,NUMX,
-     &           PR,ARP,CV,OUT)
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER ID,MOD,N,LIRN,NUM,NUMX
-      INTEGER ARP(N),CV(N),IRN(LIRN),IP(N),
-     &        FC(N),IPERM(N),LENC(N),OUT(N),PR(N)
-
-C PR(J) is the previous column to J in the depth first search.
-C   Array PR is used as workspace in the sorting algorithm.
-C Elements (I,IPERM(I)) I=1,..,N are entries at the end of the
-C   algorithm unless N assignments have not been made in which case
-C   N-NUM pairs (I,IPERM(I)) will not be entries in the matrix.
-C CV(I) is the most recent loop number (ID+JORD) at which row I
-C   was visited.
-C ARP(J) is the number of entries in column J which have been scanned 
-C   when looking for a cheap assignment.
-C OUT(J) is one less than the number of entries in column J which have 
-C   not been scanned during one pass through the main loop.
-C NUMX is maximum possible size of matching.
-
-      INTEGER I,II,IN1,IN2,J,J1,JORD,K,KK,LAST,NFC,
-     &        NUM0,NUM1,NUM2,ID0,ID1
-
-      IF (ID.EQ.1) THEN
-C The first call to MC64U/UD.
-C Initialize CV and ARP; parameters MOD, NUMX are not accessed
-        DO 5 I = 1,N
-          CV(I) = 0
-          ARP(I) = 0
-    5   CONTINUE
-        NUM1 = N
-        NUM2 = N
-      ELSE
-C Not the first call to MC64U/UD.
-C Re-initialize ARP if entries were deleted since last call to MC64U/UD
-        IF (MOD.EQ.1) THEN
-          DO 8 I = 1,N
-            ARP(I) = 0
-    8     CONTINUE
-        ENDIF
-        NUM1 = NUMX
-        NUM2 = N - NUMX
-      ENDIF
-      NUM0 = NUM
-
-C NUM0 is size of input matching
-C NUM1 is maximum possible size of matching
-C NUM2 is maximum allowed number of unassigned rows/columns
-C NUM is size of current matching
-
-C Quick return if possible
-C      IF (NUM.EQ.N) GO TO 199
-C NFC is number of rows/columns that could not be assigned
-      NFC = 0
-C Integers ID0+1 to ID0+N are unique numbers for call ID to MC64U/UD,
-C so 1st call uses 1..N, 2nd call uses N+1..2N, etc
-      ID0 = (ID-1)*N 
-
-C Main loop. Each pass round this loop either results in a new
-C assignment or gives a column with no assignment
-
-      DO 100 JORD = NUM0+1,N
-
-C Each pass uses unique number ID1
-        ID1 = ID0 + JORD
-C J is unmatched column
-        J = FC(JORD-NUM0)
-        PR(J) = -1
-        DO 70 K = 1,JORD
-C Look for a cheap assignment
-          IF (ARP(J).GE.LENC(J)) GO TO 30
-          IN1 = IP(J) + ARP(J)
-          IN2 = IP(J) + LENC(J) - 1
-          DO 20 II = IN1,IN2
-            I = IRN(II)
-            IF (IPERM(I).EQ.0) GO TO 80
-   20     CONTINUE
-C No cheap assignment in row
-          ARP(J) = LENC(J)
-C Begin looking for assignment chain starting with row J
-   30     OUT(J) = LENC(J) - 1
-C Inner loop.  Extends chain by one or backtracks
-          DO 60 KK = 1,JORD
-            IN1 = OUT(J)
-            IF (IN1.LT.0) GO TO 50
-            IN2 = IP(J) + LENC(J) - 1
-            IN1 = IN2 - IN1
-C Forward scan
-            DO 40 II = IN1,IN2
-              I = IRN(II)
-              IF (CV(I).EQ.ID1) GO TO 40
-C Column J has not yet been accessed during this pass
-              J1 = J
-              J = IPERM(I)
-              CV(I) = ID1
-              PR(J) = J1
-              OUT(J1) = IN2 - II - 1
-              GO TO 70
-   40       CONTINUE
-C Backtracking step.
-   50       J1 = PR(J)
-            IF (J1.EQ.-1) THEN
-C No augmenting path exists for column J.
-              NFC = NFC + 1
-              FC(NFC) = J
-              IF (NFC.GT.NUM2) THEN
-C A matching of maximum size NUM1 is not possible
-                LAST = JORD
-                GO TO 101
-              ENDIF
-              GO TO 100
-            ENDIF
-            J = J1
-   60     CONTINUE
-C End of dummy loop; this point is never reached
-   70   CONTINUE
-C End of dummy loop; this point is never reached
-
-C New assignment is made.
-   80   IPERM(I) = J
-        ARP(J) = II - IP(J) + 1
-        NUM = NUM + 1
-        DO 90 K = 1,JORD
-          J = PR(J)
-          IF (J.EQ.-1) GO TO 95
-          II = IP(J) + LENC(J) - OUT(J) - 2
-          I = IRN(II)
-          IPERM(I) = J
-   90   CONTINUE
-C End of dummy loop; this point is never reached
-
-   95   IF (NUM.EQ.NUM1) THEN
-C A matching of maximum size NUM1 is found
-          LAST = JORD
-          GO TO 101
-        ENDIF
-C
-  100 CONTINUE
-
-C All unassigned columns have been considered
-      LAST = N
-
-C Now, a transversal is computed or is not possible.
-C Complete FC before returning.
-  101 DO 110 JORD = LAST+1,N
-        NFC = NFC + 1
-        FC(NFC) = FC(JORD-NUM0)
-  110 CONTINUE
-
-C  199 RETURN
-      RETURN
-      END
-
-C**********************************************************************
-      SUBROUTINE MC64WD(N,NE,IP,IRN,A,IPERM,NUM,
-     &           JPERM,OUT,PR,Q,L,U,D) 
-      IMPLICIT NONE
-C
-C *** Copyright (c) 1999  Council for the Central Laboratory of the
-C     Research Councils                                             ***
-C *** Although every effort has been made to ensure robustness and  ***
-C *** reliability of the subroutines in this MC64 suite, we         ***
-C *** disclaim any liability arising through the use or misuse of   ***
-C *** any of the subroutines.                                       ***
-C *** Any problems?   Contact ...
-C     Iain Duff (I.Duff@rl.ac.uk) or Jacko Koster (jak@ii.uib.no)   ***
-C
-      INTEGER N,NE,NUM
-      INTEGER IP(N+1),IRN(NE),IPERM(N),
-     &        JPERM(N),OUT(N),PR(N),Q(N),L(N)
-      DOUBLE PRECISION A(NE),U(N),D(N)
-
-C N, NE, IP, IRN are described in MC64A/AD.
-C A is a REAL (DOUBLE PRECISION in the D-version) array of length NE.
-C   A(K), K=1..NE, must be set to the value of the entry that
-C   corresponds to IRN(K). It is not altered.
-C   All values A(K) must be non-negative.
-C IPERM is an INTEGER array of length N. On exit, it contains the 
-C   weighted matching: IPERM(I) = 0 or row I is matched to column 
-C   IPERM(I). 
-C NUM is an INTEGER variable. On exit, it contains the cardinality of 
-C   the matching stored in IPERM.
-C IW is an INTEGER work array of length 5N.
-C DW is a REAL (DOUBLE PRECISION in the D-version) array of length 2N.
-C   On exit, U = D(1:N) contains the dual row variable and 
-C   V = D(N+1:2N) contains the dual column variable. If the matrix 
-C   is structurally nonsingular (NUM = N), the following holds:
-C      U(I)+V(J) <= A(I,J)  if IPERM(I) |= J
-C      U(I)+V(J)  = A(I,J)  if IPERM(I)  = J
-C      U(I) = 0  if IPERM(I) = 0
-C      V(J) = 0  if there is no I for which IPERM(I) = J
-
-C Local variables
-      INTEGER I,I0,II,J,JJ,JORD,Q0,QLEN,JDUM,ISP,JSP,
-     &        K,K0,K1,K2,KK,KK1,KK2,UP,LOW
-      DOUBLE PRECISION CSP,DI,DMIN,DNEW,DQ0,VJ
-C Local parameters
-      DOUBLE PRECISION RINF,ZERO
-      PARAMETER (ZERO=0.0D+0)
-C External subroutines and/or functions
-c      EXTERNAL FD05AD,MC64DD,MC64ED,MC64FD
-c      DOUBLE PRECISION FD05AD
-      EXTERNAL MC64DD,MC64ED,MC64FD, DLAMCH
-      DOUBLE PRECISION DLAMCH
-
-
-C Set RINF to largest positive real number
-c XSL      RINF = FD05AD(5)
-      RINF = DLAMCH('Overflow')
-
-C Initialization
-      NUM = 0
-      DO 10 K = 1,N
-        U(K) = RINF
-        D(K) = ZERO
-        IPERM(K) = 0
-        JPERM(K) = 0
-        PR(K) = IP(K)
-        L(K) = 0
-   10 CONTINUE
-C Initialize U(I) 
-      DO 30 J = 1,N
-        DO 20 K = IP(J),IP(J+1)-1
-          I = IRN(K)
-          IF (A(K).GT.U(I)) GO TO 20
-          U(I) = A(K)
-          IPERM(I) = J
-          L(I) = K
-   20   CONTINUE
-   30 CONTINUE
-      DO 40 I = 1,N
-        J = IPERM(I)
-        IF (J.EQ.0) GO TO 40
-C Row I is not empty
-        IPERM(I) = 0
-        IF (JPERM(J).NE.0) GO TO 40
-C Assignment of column J to row I
-        NUM = NUM + 1
-        IPERM(I) = J
-        JPERM(J) = L(I)
-   40 CONTINUE
-      IF (NUM.EQ.N) GO TO 1000
-C Scan unassigned columns; improve assignment
-      DO 95 J = 1,N
-C JPERM(J) ne 0 iff column J is already assigned
-        IF (JPERM(J).NE.0) GO TO 95
-        K1 = IP(J)
-        K2 = IP(J+1) - 1
-C Continue only if column J is not empty
-        IF (K1.GT.K2) GO TO 95
-        VJ = RINF
-        DO 50 K = K1,K2
-          I = IRN(K)
-          DI = A(K) - U(I)
-          IF (DI.GT.VJ) GO TO 50
-          IF (DI.LT.VJ .OR. DI.EQ.RINF) GO TO 55
-          IF (IPERM(I).NE.0 .OR. IPERM(I0).EQ.0) GO TO 50
-   55     VJ = DI
-          I0 = I
-          K0 = K
-   50   CONTINUE
-        D(J) = VJ
-        K = K0
-        I = I0
-        IF (IPERM(I).EQ.0) GO TO 90
-        DO 60 K = K0,K2
-          I = IRN(K)
-          IF (A(K)-U(I).GT.VJ) GO TO 60 
-          JJ = IPERM(I)
-C Scan remaining part of assigned column JJ 
-          KK1 = PR(JJ)
-          KK2 = IP(JJ+1) - 1
-          IF (KK1.GT.KK2) GO TO 60
-          DO 70 KK = KK1,KK2
-            II = IRN(KK)
-            IF (IPERM(II).GT.0) GO TO 70
-            IF (A(KK)-U(II).LE.D(JJ)) GO TO 80
-   70     CONTINUE
-          PR(JJ) = KK2 + 1
-   60   CONTINUE
-        GO TO 95
-   80   JPERM(JJ) = KK
-        IPERM(II) = JJ
-        PR(JJ) = KK + 1
-   90   NUM = NUM + 1
-        JPERM(J) = K
-        IPERM(I) = J
-        PR(J) = K + 1
-   95 CONTINUE
-      IF (NUM.EQ.N) GO TO 1000
-  
-C Prepare for main loop
-      DO 99 I = 1,N
-        D(I) = RINF
-        L(I) = 0
-   99 CONTINUE
-
-C Main loop ... each pass round this loop is similar to Dijkstra's 
-C algorithm for solving the single source shortest path problem 
-
-      DO 100 JORD = 1,N
-
-        IF (JPERM(JORD).NE.0) GO TO 100
-C JORD is next unmatched column
-C DMIN is the length of shortest path in the tree
-        DMIN = RINF
-        QLEN = 0
-        LOW = N + 1
-        UP = N + 1
-C CSP is the cost of the shortest augmenting path to unassigned row
-C IRN(ISP). The corresponding column index is JSP.
-        CSP = RINF
-C Build shortest path tree starting from unassigned column (root) JORD
-        J = JORD
-        PR(J) = -1
-
-C Scan column J
-        DO 115 K = IP(J),IP(J+1)-1
-          I = IRN(K)
-          DNEW = A(K) - U(I)
-          IF (DNEW.GE.CSP) GO TO 115
-          IF (IPERM(I).EQ.0) THEN
-            CSP = DNEW
-            ISP = K
-            JSP = J
-          ELSE
-            IF (DNEW.LT.DMIN) DMIN = DNEW
-            D(I) = DNEW
-            QLEN = QLEN + 1
-            Q(QLEN) = K
-          ENDIF
-  115   CONTINUE
-C Initialize heap Q and Q2 with rows held in Q(1:QLEN)
-        Q0 = QLEN
-        QLEN = 0
-        DO 120 KK = 1,Q0
-          K = Q(KK)
-          I = IRN(K)
-          IF (CSP.LE.D(I)) THEN
-            D(I) = RINF
-            GO TO 120
-          ENDIF
-          IF (D(I).LE.DMIN) THEN
-            LOW = LOW - 1
-            Q(LOW) = I
-            L(I) = LOW
-          ELSE
-            QLEN = QLEN + 1
-            L(I) = QLEN
-            CALL MC64DD(I,N,Q,D,L,2)
-          ENDIF
-C Update tree
-          JJ = IPERM(I)
-          OUT(JJ) = K
-          PR(JJ) = J
-  120   CONTINUE
-
-        DO 150 JDUM = 1,NUM
-
-C If Q2 is empty, extract rows from Q 
-          IF (LOW.EQ.UP) THEN
-            IF (QLEN.EQ.0) GO TO 160
-            I = Q(1)
-            IF (D(I).GE.CSP) GO TO 160
-            DMIN = D(I)
-  152       CALL MC64ED(QLEN,N,Q,D,L,2)
-            LOW = LOW - 1
-            Q(LOW) = I
-            L(I) = LOW
-            IF (QLEN.EQ.0) GO TO 153
-            I = Q(1)
-            IF (D(I).GT.DMIN) GO TO 153
-            GO TO 152
-          ENDIF
-C Q0 is row whose distance D(Q0) to the root is smallest
-  153     Q0 = Q(UP-1)
-          DQ0 = D(Q0)
-C Exit loop if path to Q0 is longer than the shortest augmenting path 
-          IF (DQ0.GE.CSP) GO TO 160
-          UP = UP - 1
-
-C Scan column that matches with row Q0
-          J = IPERM(Q0)
-          VJ = DQ0 - A(JPERM(J)) + U(Q0)
-          DO 155 K = IP(J),IP(J+1)-1
-            I = IRN(K)
-            IF (L(I).GE.UP) GO TO 155
-C DNEW is new cost
-            DNEW = VJ + A(K)-U(I)
-C Do not update D(I) if DNEW ge cost of shortest path
-            IF (DNEW.GE.CSP) GO TO 155
-            IF (IPERM(I).EQ.0) THEN
-C Row I is unmatched; update shortest path info
-              CSP = DNEW
-              ISP = K
-              JSP = J
-            ELSE
-C Row I is matched; do not update D(I) if DNEW is larger
-              DI = D(I)
-              IF (DI.LE.DNEW) GO TO 155
-              IF (L(I).GE.LOW) GO TO 155
-              D(I) = DNEW
-              IF (DNEW.LE.DMIN) THEN
-                IF (L(I).NE.0) 
-     *            CALL MC64FD(L(I),QLEN,N,Q,D,L,2)
-                LOW = LOW - 1
-                Q(LOW) = I
-                L(I) = LOW
-              ELSE   
-                IF (L(I).EQ.0) THEN
-                  QLEN = QLEN + 1
-                  L(I) = QLEN
-                ENDIF
-                CALL MC64DD(I,N,Q,D,L,2)
-              ENDIF
-C Update tree
-              JJ = IPERM(I)
-              OUT(JJ) = K
-              PR(JJ) = J
-            ENDIF
-  155     CONTINUE
-  150   CONTINUE
-       
-C If CSP = RINF, no augmenting path is found
-  160   IF (CSP.EQ.RINF) GO TO 190
-C Find augmenting path by tracing backward in PR; update IPERM,JPERM
-        NUM = NUM + 1
-        I = IRN(ISP)
-        IPERM(I) = JSP
-        JPERM(JSP) = ISP
-        J = JSP
-        DO 170 JDUM = 1,NUM
-          JJ = PR(J) 
-          IF (JJ.EQ.-1) GO TO 180
-          K = OUT(J)
-          I = IRN(K)
-          IPERM(I) = JJ
-          JPERM(JJ) = K
-          J = JJ
-  170   CONTINUE
-C End of dummy loop; this point is never reached
-
-C Update U for rows in Q(UP:N)
-  180   DO 185 KK = UP,N
-          I = Q(KK)
-          U(I) = U(I) + D(I) - CSP
-  185   CONTINUE 
-  190   DO 191 KK = LOW,N
-          I = Q(KK)
-          D(I) = RINF
-          L(I) = 0
-  191   CONTINUE 
-        DO 193 KK = 1,QLEN
-          I = Q(KK)
-          D(I) = RINF
-          L(I) = 0
-  193   CONTINUE
-
-  100 CONTINUE
-C End of main loop
-
-
-C Set dual column variable in D(1:N)
- 1000 DO 200 J = 1,N
-        K = JPERM(J)
-        IF (K.NE.0) THEN
-          D(J) = A(K) - U(IRN(K))
-        ELSE
-          D(J) = ZERO
-        ENDIF
-        IF (IPERM(J).EQ.0) U(J) = ZERO
-  200 CONTINUE
-
-      IF (NUM.EQ.N) GO TO 1100
-
-C The matrix is structurally singular, complete IPERM.
-C JPERM, OUT are work arrays
-      DO 300 J = 1,N
-        JPERM(J) = 0
-  300 CONTINUE
-      K = 0
-      DO 310 I = 1,N
-        IF (IPERM(I).EQ.0) THEN
-          K = K + 1
-          OUT(K) = I
-        ELSE
-          J = IPERM(I)
-          JPERM(J) = I
-        ENDIF
-  310 CONTINUE
-      K = 0
-      DO 320 J = 1,N
-        IF (JPERM(J).NE.0) GO TO 320
-        K = K + 1
-        JDUM = OUT(K)
-        IPERM(JDUM) = J
-  320 CONTINUE
- 1100 RETURN
-      END
-
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/sgsitrf.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/sgsitrf.c.bak
deleted file mode 100644
index 3dfaadd..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/sgsitrf.c.bak
+++ /dev/null
@@ -1,626 +0,0 @@
-
-/*! @file sgsitf.c
- * \brief Computes an ILU factorization of a general sparse matrix
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory.
- * June 30, 2009
- * </pre>
- */
-
-#include "slu_sdefs.h"
-
-#ifdef DEBUG
-int num_drop_L;
-#endif
-
-/*! \brief
- *
- * <pre>
- * Purpose
- * =======
- *
- * SGSITRF computes an ILU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *	   The structure defines the input parameters to control
- *	   how the ILU decomposition will be performed.
- *
- * A	    (input) SuperMatrix*
- *	    Original matrix A, permuted by columns, of dimension
- *	    (A->nrow, A->ncol). The type of A can be:
- *	    Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
- *
- * relax    (input) int
- *	    To control degree of relaxing supernodes. If the number
- *	    of nodes (columns) in a subtree of the elimination tree is less
- *	    than relax, this subtree is considered as one supernode,
- *	    regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *	    A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *	    Elimination tree of A'*A.
- *	    Note: etree is a vector of parent pointers for a forest whose
- *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *	    On input, the columns of A should be permuted so that the
- *	    etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *	    User-supplied work space and space for the output data structures.
- *	    Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *	   Specifies the size of work array in bytes.
- *	   = 0:  allocate space internally by system malloc;
- *	   > 0:  use user-supplied work array of length lwork in bytes,
- *		 returns error if space runs out.
- *	   = -1: the routine guesses the amount of space needed without
- *		 performing the factorization, and returns it in
- *		 *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *	    Column permutation vector, which defines the
- *	    permutation matrix Pc; perm_c[i] = j means column i of A is
- *	    in position j in A*Pc.
- *	    When searching for diagonal, perm_c[*] is applied to the
- *	    row subscripts of A, so that diagonal threshold pivoting
- *	    can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *	    Row permutation vector which defines the permutation matrix Pr,
- *	    perm_r[i] = j means row i of A is in position j in Pr*A.
- *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *	       will try to use the input perm_r, unless a certain threshold
- *	       criterion is violated. In that case, perm_r is overwritten by
- *	       a new permutation determined by partial pivoting or diagonal
- *	       threshold pivoting.
- *	    Otherwise, perm_r is output argument;
- *
- * L	    (output) SuperMatrix*
- *	    The factor L from the factorization Pr*A=L*U; use compressed row
- *	    subscripts storage for supernodes, i.e., L has type:
- *	    Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U	    (output) SuperMatrix*
- *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *	    storage scheme, i.e., U has types: Stype = SLU_NC,
- *	    Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *	    Record the statistics on runtime and floating-point operation count.
- *	    See slu_util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *	    = 0: successful exit
- *	    < 0: if info = -i, the i-th argument had an illegal value
- *	    > 0: if info = i, and i is
- *	       <= A->ncol: number of zero pivots. They are replaced by small
- *		  entries according to options->ILU_FillTol.
- *	       > A->ncol: number of bytes allocated when memory allocation
- *		  failure occurred, plus A->ncol. If lwork = -1, it is
- *		  the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays:
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been
- *	reached when working on column j.
- *	Storage: relative to original row subscripts
- *	NOTE: There are 4 of them:
- *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
- *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
- *	      marker_relax(has its own space) is used for relaxed supernodes.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *	Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
- *	unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *	in topological order of the dfs. A supernode representative is the
- *	last column of a supernode.
- *	The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
- *	supernodal representative r, repfnz[r] is the location of the first
- *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *	indicates the supernode r has been explored.
- *	NOTE: There are W of them, each used for one column of a panel.
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
- *	the panel diagonal. These are filled in during dpanel_dfs(), and are
- *	used later in the inner LU factorization within the panel.
- *	panel_lsub[]/dense[] pair forms the SPA data structure.
- *	NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *		   NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
- *	It is also used by the dropping routine ilu_ddrop_row().
- * </pre>
- */
-
-void
-sgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
-	int *etree, void *work, int lwork, int *perm_c, int *perm_r,
-	SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info)
-{
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when
-				  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *swap, *iswap; /* swap is used to store the row permutation
-				during the factorization. Initially, it is set
-				to iperm_c (row indeces of Pc*A*Pc').
-				iswap is the inverse of swap. After the
-				factorization, it is equal to perm_r. */
-    int       *iwork;
-    float   *swork;
-    int       *segrep, *repfnz, *parent, *xplore;
-    int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
-    int       *marker, *marker_relax;
-    float    *dense, *tempv;
-    int       *relax_end, *relax_fsupc;
-    float    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    float    *amax; 
-    float    drop_sum;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-    int       *iwork2;	   /* used by the second dropping rule */
-
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    double    drop_tol = options->ILU_DropTol; /* tau */
-    double    fill_ini = options->ILU_FillTol; /* tau^hat */
-    double    gamma = options->ILU_FillFactor;
-    int       drop_rule = options->ILU_DropRule;
-    milu_t    milu = options->ILU_MILU;
-    double    fill_tol;
-    int       pivrow;	/* pivotal row number in the original matrix A */
-    int       nseg1;	/* no of segments in U-column above panel row jcol */
-    int       nseg;	/* no of segments in each U-column */
-    register int jcol;
-    register int kcol;	/* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;	/* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-
-    int       last_drop;/* the last column which the dropping rules applied */
-    int       quota;
-    int       nnzAj;	/* number of nonzeros in A(:,1:j) */
-    int       nnzLj, nnzUj;
-    double    tol_L = drop_tol, tol_U = drop_tol;
-    float zero = 0.0;
-
-    /* Executable */	   
-    iinfo    = 0;
-    m	     = A->nrow;
-    n	     = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a	     = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-
-    /* Allocate storage common to the factor routines */
-    *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
-		       gamma, L, U, &Glu, &iwork, &swork);
-    if ( *info ) return;
-
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-	     &repfnz, &panel_lsub, &marker_relax, &marker);
-    sSetRWork(m, panel_size, swork, &dense, &tempv);
-
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-	/* Compute the inverse of perm_r */
-	iperm_r = (int *) intMalloc(m);
-	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-	iperm_r_allocated = 1;
-    }
-
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-    swap = (int *)intMalloc(n);
-    for (k = 0; k < n; k++) swap[k] = iperm_c[k];
-    iswap = (int *)intMalloc(n);
-    for (k = 0; k < n; k++) iswap[k] = perm_c[k];
-    amax = (float *) floatMalloc(panel_size);
-    if (drop_rule & DROP_SECONDARY)
-	iwork2 = (int *)intMalloc(n);
-    else
-	iwork2 = NULL;
-
-    nnzAj = 0;
-    nnzLj = 0;
-    nnzUj = 0;
-    last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
-
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    relax_fsupc = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES )
-	ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
-    else
-	ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
-
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-
-    /* Mark the rows used by relaxed supernodes */
-    ifill (marker_relax, m, EMPTY);
-    i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
-	         asub, marker_relax);
-#if ( PRNTlevel >= 1)
-    printf("%d relaxed supernodes.\n", i);
-#endif
-
-    /*
-     * Work on one "panel" at a time. A panel is one of the following:
-     *	   (a) a relaxed supernode at the bottom of the etree, or
-     *	   (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-
-	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
-	    panel_histo[kcol-jcol+1]++;
-
-	    /* Drop small rows in the previous supernode. */
-	    if (jcol > 0 && jcol < last_drop) {
-		int first = xsup[supno[jcol - 1]];
-		int last = jcol - 1;
-		int quota;
-
-		/* Compute the quota */
-		if (drop_rule & DROP_PROWS)
-		    quota = gamma * Astore->nnz / m * (m - first) / m
-			    * (last - first + 1);
-		else if (drop_rule & DROP_COLUMN) {
-		    int i;
-		    quota = 0;
-		    for (i = first; i <= last; i++)
-			quota += xa_end[i] - xa_begin[i];
-		    quota = gamma * quota * (m - first) / m;
-		} else if (drop_rule & DROP_AREA)
-		    quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-			    - nnzLj;
-		else
-		    quota = m * n;
-		fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
-
-		/* Drop small rows */
-		i = ilu_sdrop_row(options, first, last, tol_L, quota, &nnzLj,
-				  &fill_tol, &Glu, tempv, iwork2, 0);
-		/* Reset the parameters */
-		if (drop_rule & DROP_DYNAMIC) {
-		    if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-			     < nnzLj)
-			tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
-		    else
-			tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
-		}
-		if (fill_tol < 0) iinfo -= (int)fill_tol;
-#ifdef DEBUG
-		num_drop_L += i * (last - first + 1);
-#endif
-	    }
-
-	    /* --------------------------------------
-	     * Factorize the relaxed supernode(jcol:kcol)
-	     * -------------------------------------- */
-	    /* Determine the union of the row structure of the snode */
-	    if ( (*info = ilu_ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-					 marker, &Glu)) != 0 )
-		return;
-
-	    nextu    = xusub[jcol];
-	    nextlu   = xlusup[jcol];
-	    jsupno   = supno[jcol];
-	    fsupc    = xsup[jsupno];
-	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-	    nzlumax = Glu.nzlumax;
-	    while ( new_next > nzlumax ) {
-		if ((*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)))
-		    return;
-	    }
-
-	    for (icol = jcol; icol <= kcol; icol++) {
-		xusub[icol+1] = nextu;
-
-		amax[0] = 0.0;
-		/* Scatter into SPA dense[*] */
-		for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
-		    register float tmp = fabs(a[k]);
-		    if (tmp > amax[0]) amax[0] = tmp;
-		    dense[asub[k]] = a[k];
-		}
-		nnzAj += xa_end[icol] - xa_begin[icol];
-		if (amax[0] == 0.0) {
-		    amax[0] = fill_ini;
-#if ( PRNTlevel >= 1)
-		    printf("Column %d is entirely zero!\n", icol);
-		    fflush(stdout);
-#endif
-		}
-
-		/* Numeric update within the snode */
-		ssnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-
-		if (usepr) pivrow = iperm_r[icol];
-		fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
-		if ( (*info = ilu_spivotL(icol, diag_pivot_thresh, &usepr,
-					  perm_r, iperm_c[icol], swap, iswap,
-					  marker_relax, &pivrow,
-                                          amax[0] * fill_tol, milu, zero,
-                                          &Glu, stat)) ) {
-		    iinfo++;
-		    marker[pivrow] = kcol;
-		}
-
-	    }
-
-	    jcol = kcol + 1;
-
-	} else { /* Work on one panel of panel_size columns */
-
-	    /* Adjust panel_size so that a panel won't overlap with the next
-	     * relaxed snode.
-	     */
-	    panel_size = w_def;
-	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
-		if ( relax_end[k] != EMPTY ) {
-		    panel_size = k - jcol;
-		    break;
-		}
-	    if ( k == min_mn ) panel_size = min_mn - jcol;
-	    panel_histo[panel_size]++;
-
-	    /* symbolic factor on a panel of columns */
-	    ilu_spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                          dense, amax, panel_lsub, segrep, repfnz,
-                          marker, parent, xplore, &Glu);
-
-	    /* numeric sup-panel updates in topological order */
-	    spanel_bmod(m, panel_size, jcol, nseg1, dense,
-			tempv, segrep, repfnz, &Glu, stat);
-
-	    /* Sparse LU within the panel, and below panel diagonal */
-	    for (jj = jcol; jj < jcol + panel_size; jj++) {
-
-		k = (jj - jcol) * m; /* column index for w-wide arrays */
-
-		nseg = nseg1;	/* Begin after all the panel segments */
-
-		nnzAj += xa_end[jj] - xa_begin[jj];
-
-		if ((*info = ilu_scolumn_dfs(m, jj, perm_r, &nseg,
-					     &panel_lsub[k], segrep, &repfnz[k],
-					     marker, parent, xplore, &Glu)))
-		    return;
-
-		/* Numeric updates */
-		if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
-					  tempv, &segrep[nseg1], &repfnz[k],
-					  jcol, &Glu, stat)) != 0) return;
-
-		/* Make a fill-in position if the column is entirely zero */
-		if (xlsub[jj + 1] == xlsub[jj]) {
-		    register int i, row;
-		    int nextl;
-		    int nzlmax = Glu.nzlmax;
-		    int *lsub = Glu.lsub;
-		    int *marker2 = marker + 2 * m;
-
-		    /* Allocate memory */
-		    nextl = xlsub[jj] + 1;
-		    if (nextl >= nzlmax) {
-			int error = sLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu);
-			if (error) { *info = error; return; }
-			lsub = Glu.lsub;
-		    }
-		    xlsub[jj + 1]++;
-		    assert(xlusup[jj]==xlusup[jj+1]);
-		    xlusup[jj + 1]++;
-		    Glu.lusup[xlusup[jj]] = zero;
-
-		    /* Choose a row index (pivrow) for fill-in */
-		    for (i = jj; i < n; i++)
-			if (marker_relax[swap[i]] <= jj) break;
-		    row = swap[i];
-		    marker2[row] = jj;
-		    lsub[xlsub[jj]] = row;
-#ifdef DEBUG
-		    printf("Fill col %d.\n", jj);
-		    fflush(stdout);
-#endif
-		}
-
-		/* Computer the quota */
-		if (drop_rule & DROP_PROWS)
-		    quota = gamma * Astore->nnz / m * jj / m;
-		else if (drop_rule & DROP_COLUMN)
-		    quota = gamma * (xa_end[jj] - xa_begin[jj]) *
-			    (jj + 1) / m;
-		else if (drop_rule & DROP_AREA)
-		    quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
-		else
-		    quota = m;
-
-		/* Copy the U-segments to ucol[*] and drop small entries */
-		if ((*info = ilu_scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-					       perm_r, &dense[k], drop_rule,
-					       milu, amax[jj - jcol] * tol_U,
-					       quota, &drop_sum, &nnzUj, &Glu,
-					       iwork2)) != 0)
-		    return;
-
-		/* Reset the dropping threshold if required */
-		if (drop_rule & DROP_DYNAMIC) {
-		    if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
-			tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
-		    else
-			tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
-		}
-
-                drop_sum *= MILU_ALPHA;
-		if (usepr) pivrow = iperm_r[jj];
-		fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
-		if ( (*info = ilu_spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-					  iperm_c[jj], swap, iswap,
-					  marker_relax, &pivrow,
-					  amax[jj - jcol] * fill_tol, milu,
-					  drop_sum, &Glu, stat)) ) {
-		    iinfo++;
-		    marker[m + pivrow] = jj;
-		    marker[2 * m + pivrow] = jj;
-		}
-
-		/* Reset repfnz[] for this column */
-		resetrep_col (nseg, segrep, &repfnz[k]);
-
-		/* Start a new supernode, drop the previous one */
-		if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
-		    int first = xsup[supno[jj - 1]];
-		    int last = jj - 1;
-		    int quota;
-
-		    /* Compute the quota */
-		    if (drop_rule & DROP_PROWS)
-			quota = gamma * Astore->nnz / m * (m - first) / m
-				* (last - first + 1);
-		    else if (drop_rule & DROP_COLUMN) {
-			int i;
-			quota = 0;
-			for (i = first; i <= last; i++)
-			    quota += xa_end[i] - xa_begin[i];
-			quota = gamma * quota * (m - first) / m;
-		    } else if (drop_rule & DROP_AREA)
-			quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
-				/ m) - nnzLj;
-		    else
-			quota = m * n;
-		    fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
-			    (double)min_mn);
-
-		    /* Drop small rows */
-		    i = ilu_sdrop_row(options, first, last, tol_L, quota,
-				      &nnzLj, &fill_tol, &Glu, tempv, iwork2,
-				      1);
-
-		    /* Reset the parameters */
-		    if (drop_rule & DROP_DYNAMIC) {
-			if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-				< nnzLj)
-			    tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
-			else
-			    tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
-		    }
-		    if (fill_tol < 0) iinfo -= (int)fill_tol;
-#ifdef DEBUG
-		    num_drop_L += i * (last - first + 1);
-#endif
-		} /* if start a new supernode */
-
-	    } /* for */
-
-	    jcol += panel_size; /* Move to the next panel */
-
-	} /* else */
-
-    } /* for */
-
-    *info = iinfo;
-
-    if ( m > n ) {
-	k = 0;
-	for (i = 0; i < m; ++i)
-	    if ( perm_r[i] == EMPTY ) {
-		perm_r[i] = n + k;
-		++k;
-	    }
-    }
-
-    ilu_countnz(min_mn, &nnzL, &nnzU, &Glu);
-    fixupL(min_mn, perm_r, &Glu);
-
-    sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
-
-    if ( fact == SamePattern_SameRowPerm ) {
-	/* L and U structures may have changed due to possibly different
-	   pivoting, even though the storage is available.
-	   There could also be memory expansions, so the array locations
-	   may have changed, */
-	((SCformat *)L->Store)->nnz = nnzL;
-	((SCformat *)L->Store)->nsuper = Glu.supno[n];
-	((SCformat *)L->Store)->nzval = Glu.lusup;
-	((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-	((SCformat *)L->Store)->rowind = Glu.lsub;
-	((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-	((NCformat *)U->Store)->nnz = nnzU;
-	((NCformat *)U->Store)->nzval = Glu.ucol;
-	((NCformat *)U->Store)->rowind = Glu.usub;
-	((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-	sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
-				 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-				 Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
-	sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
-			       Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
-    }
-
-    ops[FACT] += ops[TRSV] + ops[GEMV];
-    stat->expansions = --(Glu.num_expansions);
-
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
-    SUPERLU_FREE (swap);
-    SUPERLU_FREE (iswap);
-    SUPERLU_FREE (relax_fsupc);
-    SUPERLU_FREE (amax);
-    if ( iwork2 ) SUPERLU_FREE (iwork2);
-
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/slacon.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/slacon.c.bak
deleted file mode 100644
index 4e02fdc..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/slacon.c.bak
+++ /dev/null
@@ -1,236 +0,0 @@
-
-/*! @file slacon.c
- * \brief Estimates the 1-norm
- *
- * <pre>
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- * </pre>
- */
-#include <math.h>
-#include "slu_Cnames.h"
-
-/*! \brief
- *
- * <pre>
- *   Purpose   
- *   =======   
- *
- *   SLACON estimates the 1-norm of a square matrix A.   
- *   Reverse communication is used for evaluating matrix-vector products. 
- * 
- *
- *   Arguments   
- *   =========   
- *
- *   N      (input) INT
- *          The order of the matrix.  N >= 1.   
- *
- *   V      (workspace) FLOAT PRECISION array, dimension (N)   
- *          On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
- *          (W is not returned).   
- *
- *   X      (input/output) FLOAT PRECISION array, dimension (N)   
- *          On an intermediate return, X should be overwritten by   
- *                A * X,   if KASE=1,   
- *                A' * X,  if KASE=2,
- *         and SLACON must be re-called with all the other parameters   
- *          unchanged.   
- *
- *   ISGN   (workspace) INT array, dimension (N)
- *
- *   EST    (output) FLOAT PRECISION   
- *          An estimate (a lower bound) for norm(A).   
- *
- *   KASE   (input/output) INT
- *          On the initial call to SLACON, KASE should be 0.   
- *          On an intermediate return, KASE will be 1 or 2, indicating   
- *          whether X should be overwritten by A * X  or A' * X.   
- *          On the final return from SLACON, KASE will again be 0.   
- *
- *   Further Details   
- *   ======= =======   
- *
- *   Contributed by Nick Higham, University of Manchester.   
- *   Originally named CONEST, dated March 16, 1988.   
- *
- *   Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
- *   a real or complex matrix, with applications to condition estimation", 
- *   ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
- *   ===================================================================== 
- * </pre>
- */
-
-int
-slacon_(int *n, float *v, float *x, int *isgn, float *est, int *kase)
-
-{
-
-
-    /* Table of constant values */
-    int c__1 = 1;
-    float      zero = 0.0;
-    float      one = 1.0;
-    
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static float altsgn, estold;
-    static int i, j;
-    float temp;
-#ifdef _CRAY
-    extern int ISAMAX(int *, float *, int *);
-    extern float SASUM(int *, float *, int *);
-    extern int SCOPY(int *, float *, int *, float *, int *);
-#else
-    extern int isamax_(int *, float *, int *);
-    extern float sasum_(int *, float *, int *);
-    extern int scopy_(int *, float *, int *, float *, int *);
-#endif
-#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a))    /* Copy sign */
-#define i_dnnt(a) \
-	( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */
-
-    if ( *kase == 0 ) {
-	for (i = 0; i < *n; ++i) {
-	    x[i] = 1. / (float) (*n);
-	}
-	*kase = 1;
-	jump = 1;
-	return 0;
-    }
-
-    switch (jump) {
-	case 1:  goto L20;
-	case 2:  goto L40;
-	case 3:  goto L70;
-	case 4:  goto L110;
-	case 5:  goto L140;
-    }
-
-    /*     ................ ENTRY   (JUMP = 1)   
-	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
-  L20:
-    if (*n == 1) {
-	v[0] = x[0];
-	*est = fabs(v[0]);
-	/*        ... QUIT */
-	goto L150;
-    }
-#ifdef _CRAY
-    *est = SASUM(n, x, &c__1);
-#else
-    *est = sasum_(n, x, &c__1);
-#endif
-
-    for (i = 0; i < *n; ++i) {
-	x[i] = d_sign(one, x[i]);
-	isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 2)   
-	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
-L40:
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-#else
-    j = isamax_(n, &x[0], &c__1);
-#endif
-    --j;
-    iter = 2;
-
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-L50:
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 3)   
-	   X HAS BEEN OVERWRITTEN BY A*X. */
-L70:
-#ifdef _CRAY
-    SCOPY(n, x, &c__1, v, &c__1);
-#else
-    scopy_(n, x, &c__1, v, &c__1);
-#endif
-    estold = *est;
-#ifdef _CRAY
-    *est = SASUM(n, v, &c__1);
-#else
-    *est = sasum_(n, v, &c__1);
-#endif
-
-    for (i = 0; i < *n; ++i)
-	if (i_dnnt(d_sign(one, x[i])) != isgn[i])
-	    goto L90;
-
-    /*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */
-    goto L120;
-
-L90:
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-
-    for (i = 0; i < *n; ++i) {
-	x[i] = d_sign(one, x[i]);
-	isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 4)   
-	   X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
-L110:
-    jlast = j;
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-#else
-    j = isamax_(n, &x[0], &c__1);
-#endif
-    --j;
-    if (x[jlast] != fabs(x[j]) && iter < 5) {
-	++iter;
-	goto L50;
-    }
-
-    /*     ITERATION COMPLETE.  FINAL STAGE. */
-L120:
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-	x[i-1] = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.);
-	altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    
-    /*     ................ ENTRY   (JUMP = 5)   
-	   X HAS BEEN OVERWRITTEN BY A*X. */
-L140:
-#ifdef _CRAY
-    temp = SASUM(n, x, &c__1) / (float)(*n * 3) * 2.;
-#else
-    temp = sasum_(n, x, &c__1) / (float)(*n * 3) * 2.;
-#endif
-    if (temp > *est) {
-#ifdef _CRAY
-	SCOPY(n, &x[0], &c__1, &v[0], &c__1);
-#else
-	scopy_(n, &x[0], &c__1, &v[0], &c__1);
-#endif
-	*est = temp;
-    }
-
-L150:
-    *kase = 0;
-    return 0;
-
-} /* slacon_ */
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/zgsitrf.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/zgsitrf.c.bak
deleted file mode 100644
index a763c98..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/zgsitrf.c.bak
+++ /dev/null
@@ -1,629 +0,0 @@
-
-/*! @file zgsitf.c
- * \brief Computes an ILU factorization of a general sparse matrix
- *
- * <pre>
- * -- SuperLU routine (version 4.0) --
- * Lawrence Berkeley National Laboratory.
- * June 30, 2009
- * </pre>
- */
-
-#include "slu_zdefs.h"
-
-#ifdef DEBUG
-int num_drop_L;
-#endif
-
-/*! \brief
- *
- * <pre>
- * Purpose
- * =======
- *
- * ZGSITRF computes an ILU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *	   The structure defines the input parameters to control
- *	   how the ILU decomposition will be performed.
- *
- * A	    (input) SuperMatrix*
- *	    Original matrix A, permuted by columns, of dimension
- *	    (A->nrow, A->ncol). The type of A can be:
- *	    Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
- *
- * relax    (input) int
- *	    To control degree of relaxing supernodes. If the number
- *	    of nodes (columns) in a subtree of the elimination tree is less
- *	    than relax, this subtree is considered as one supernode,
- *	    regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *	    A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *	    Elimination tree of A'*A.
- *	    Note: etree is a vector of parent pointers for a forest whose
- *	    vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *	    On input, the columns of A should be permuted so that the
- *	    etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *	    User-supplied work space and space for the output data structures.
- *	    Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *	   Specifies the size of work array in bytes.
- *	   = 0:  allocate space internally by system malloc;
- *	   > 0:  use user-supplied work array of length lwork in bytes,
- *		 returns error if space runs out.
- *	   = -1: the routine guesses the amount of space needed without
- *		 performing the factorization, and returns it in
- *		 *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *	    Column permutation vector, which defines the
- *	    permutation matrix Pc; perm_c[i] = j means column i of A is
- *	    in position j in A*Pc.
- *	    When searching for diagonal, perm_c[*] is applied to the
- *	    row subscripts of A, so that diagonal threshold pivoting
- *	    can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *	    Row permutation vector which defines the permutation matrix Pr,
- *	    perm_r[i] = j means row i of A is in position j in Pr*A.
- *	    If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *	       will try to use the input perm_r, unless a certain threshold
- *	       criterion is violated. In that case, perm_r is overwritten by
- *	       a new permutation determined by partial pivoting or diagonal
- *	       threshold pivoting.
- *	    Otherwise, perm_r is output argument;
- *
- * L	    (output) SuperMatrix*
- *	    The factor L from the factorization Pr*A=L*U; use compressed row
- *	    subscripts storage for supernodes, i.e., L has type:
- *	    Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U	    (output) SuperMatrix*
- *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *	    storage scheme, i.e., U has types: Stype = SLU_NC,
- *	    Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *	    Record the statistics on runtime and floating-point operation count.
- *	    See slu_util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *	    = 0: successful exit
- *	    < 0: if info = -i, the i-th argument had an illegal value
- *	    > 0: if info = i, and i is
- *	       <= A->ncol: number of zero pivots. They are replaced by small
- *		  entries according to options->ILU_FillTol.
- *	       > A->ncol: number of bytes allocated when memory allocation
- *		  failure occurred, plus A->ncol. If lwork = -1, it is
- *		  the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays:
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been
- *	reached when working on column j.
- *	Storage: relative to original row subscripts
- *	NOTE: There are 4 of them:
- *	      marker/marker1 are used for panel dfs, see (ilu_)dpanel_dfs.c;
- *	      marker2 is used for inner-factorization, see (ilu)_dcolumn_dfs.c;
- *	      marker_relax(has its own space) is used for relaxed supernodes.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *	Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
- *	unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *	in topological order of the dfs. A supernode representative is the
- *	last column of a supernode.
- *	The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
- *	supernodal representative r, repfnz[r] is the location of the first
- *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *	indicates the supernode r has been explored.
- *	NOTE: There are W of them, each used for one column of a panel.
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
- *	the panel diagonal. These are filled in during dpanel_dfs(), and are
- *	used later in the inner LU factorization within the panel.
- *	panel_lsub[]/dense[] pair forms the SPA data structure.
- *	NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *		   NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *	The size of this array is defined by NUM_TEMPV() in slu_util.h.
- *	It is also used by the dropping routine ilu_ddrop_row().
- * </pre>
- */
-
-void
-zgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size,
-	int *etree, void *work, int lwork, int *perm_c, int *perm_r,
-	SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info)
-{
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when
-				  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *swap, *iswap; /* swap is used to store the row permutation
-				during the factorization. Initially, it is set
-				to iperm_c (row indeces of Pc*A*Pc').
-				iswap is the inverse of swap. After the
-				factorization, it is equal to perm_r. */
-    int       *iwork;
-    doublecomplex   *zwork;
-    int       *segrep, *repfnz, *parent, *xplore;
-    int       *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
-    int       *marker, *marker_relax;
-    doublecomplex    *dense, *tempv;
-    double *dtempv;
-    int       *relax_end, *relax_fsupc;
-    doublecomplex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    double    *amax; 
-    doublecomplex    drop_sum;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-    int       *iwork2;	   /* used by the second dropping rule */
-
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    double    drop_tol = options->ILU_DropTol; /* tau */
-    double    fill_ini = options->ILU_FillTol; /* tau^hat */
-    double    gamma = options->ILU_FillFactor;
-    int       drop_rule = options->ILU_DropRule;
-    milu_t    milu = options->ILU_MILU;
-    double    fill_tol;
-    int       pivrow;	/* pivotal row number in the original matrix A */
-    int       nseg1;	/* no of segments in U-column above panel row jcol */
-    int       nseg;	/* no of segments in each U-column */
-    register int jcol;
-    register int kcol;	/* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;	/* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-
-    int       last_drop;/* the last column which the dropping rules applied */
-    int       quota;
-    int       nnzAj;	/* number of nonzeros in A(:,1:j) */
-    int       nnzLj, nnzUj;
-    double    tol_L = drop_tol, tol_U = drop_tol;
-    doublecomplex zero = {0.0, 0.0};
-
-    /* Executable */	   
-    iinfo    = 0;
-    m	     = A->nrow;
-    n	     = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a	     = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-
-    /* Allocate storage common to the factor routines */
-    *info = zLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size,
-		       gamma, L, U, &Glu, &iwork, &zwork);
-    if ( *info ) return;
-
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-	     &repfnz, &panel_lsub, &marker_relax, &marker);
-    zSetRWork(m, panel_size, zwork, &dense, &tempv);
-
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-	/* Compute the inverse of perm_r */
-	iperm_r = (int *) intMalloc(m);
-	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-	iperm_r_allocated = 1;
-    }
-
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-    swap = (int *)intMalloc(n);
-    for (k = 0; k < n; k++) swap[k] = iperm_c[k];
-    iswap = (int *)intMalloc(n);
-    for (k = 0; k < n; k++) iswap[k] = perm_c[k];
-    amax = (double *) doubleMalloc(panel_size);
-    if (drop_rule & DROP_SECONDARY)
-	iwork2 = (int *)intMalloc(n);
-    else
-	iwork2 = NULL;
-
-    nnzAj = 0;
-    nnzLj = 0;
-    nnzUj = 0;
-    last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95));
-
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    relax_fsupc = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES )
-	ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
-    else
-	ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc);
-
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-
-    /* Mark the rows used by relaxed supernodes */
-    ifill (marker_relax, m, EMPTY);
-    i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end,
-	         asub, marker_relax);
-#if ( PRNTlevel >= 1)
-    printf("%d relaxed supernodes.\n", i);
-#endif
-
-    /*
-     * Work on one "panel" at a time. A panel is one of the following:
-     *	   (a) a relaxed supernode at the bottom of the etree, or
-     *	   (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-
-	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
-	    panel_histo[kcol-jcol+1]++;
-
-	    /* Drop small rows in the previous supernode. */
-	    if (jcol > 0 && jcol < last_drop) {
-		int first = xsup[supno[jcol - 1]];
-		int last = jcol - 1;
-		int quota;
-
-		/* Compute the quota */
-		if (drop_rule & DROP_PROWS)
-		    quota = gamma * Astore->nnz / m * (m - first) / m
-			    * (last - first + 1);
-		else if (drop_rule & DROP_COLUMN) {
-		    int i;
-		    quota = 0;
-		    for (i = first; i <= last; i++)
-			quota += xa_end[i] - xa_begin[i];
-		    quota = gamma * quota * (m - first) / m;
-		} else if (drop_rule & DROP_AREA)
-		    quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-			    - nnzLj;
-		else
-		    quota = m * n;
-		fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn);
-
-		/* Drop small rows */
-                dtempv = (double *) tempv;
-		i = ilu_zdrop_row(options, first, last, tol_L, quota, &nnzLj,
-				  &fill_tol, &Glu, dtempv, iwork2, 0);
-		/* Reset the parameters */
-		if (drop_rule & DROP_DYNAMIC) {
-		    if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-			     < nnzLj)
-			tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
-		    else
-			tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
-		}
-		if (fill_tol < 0) iinfo -= (int)fill_tol;
-#ifdef DEBUG
-		num_drop_L += i * (last - first + 1);
-#endif
-	    }
-
-	    /* --------------------------------------
-	     * Factorize the relaxed supernode(jcol:kcol)
-	     * -------------------------------------- */
-	    /* Determine the union of the row structure of the snode */
-	    if ( (*info = ilu_zsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-					 marker, &Glu)) != 0 )
-		return;
-
-	    nextu    = xusub[jcol];
-	    nextlu   = xlusup[jcol];
-	    jsupno   = supno[jcol];
-	    fsupc    = xsup[jsupno];
-	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-	    nzlumax = Glu.nzlumax;
-	    while ( new_next > nzlumax ) {
-		if ((*info = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)))
-		    return;
-	    }
-
-	    for (icol = jcol; icol <= kcol; icol++) {
-		xusub[icol+1] = nextu;
-
-		amax[0] = 0.0;
-		/* Scatter into SPA dense[*] */
-		for (k = xa_begin[icol]; k < xa_end[icol]; k++) {
-                    register double tmp = z_abs1 (&a[k]);
-		    if (tmp > amax[0]) amax[0] = tmp;
-		    dense[asub[k]] = a[k];
-		}
-		nnzAj += xa_end[icol] - xa_begin[icol];
-		if (amax[0] == 0.0) {
-		    amax[0] = fill_ini;
-#if ( PRNTlevel >= 1)
-		    printf("Column %d is entirely zero!\n", icol);
-		    fflush(stdout);
-#endif
-		}
-
-		/* Numeric update within the snode */
-		zsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-
-		if (usepr) pivrow = iperm_r[icol];
-		fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn);
-		if ( (*info = ilu_zpivotL(icol, diag_pivot_thresh, &usepr,
-					  perm_r, iperm_c[icol], swap, iswap,
-					  marker_relax, &pivrow,
-                                          amax[0] * fill_tol, milu, zero,
-                                          &Glu, stat)) ) {
-		    iinfo++;
-		    marker[pivrow] = kcol;
-		}
-
-	    }
-
-	    jcol = kcol + 1;
-
-	} else { /* Work on one panel of panel_size columns */
-
-	    /* Adjust panel_size so that a panel won't overlap with the next
-	     * relaxed snode.
-	     */
-	    panel_size = w_def;
-	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
-		if ( relax_end[k] != EMPTY ) {
-		    panel_size = k - jcol;
-		    break;
-		}
-	    if ( k == min_mn ) panel_size = min_mn - jcol;
-	    panel_histo[panel_size]++;
-
-	    /* symbolic factor on a panel of columns */
-	    ilu_zpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                          dense, amax, panel_lsub, segrep, repfnz,
-                          marker, parent, xplore, &Glu);
-
-	    /* numeric sup-panel updates in topological order */
-	    zpanel_bmod(m, panel_size, jcol, nseg1, dense,
-			tempv, segrep, repfnz, &Glu, stat);
-
-	    /* Sparse LU within the panel, and below panel diagonal */
-	    for (jj = jcol; jj < jcol + panel_size; jj++) {
-
-		k = (jj - jcol) * m; /* column index for w-wide arrays */
-
-		nseg = nseg1;	/* Begin after all the panel segments */
-
-		nnzAj += xa_end[jj] - xa_begin[jj];
-
-		if ((*info = ilu_zcolumn_dfs(m, jj, perm_r, &nseg,
-					     &panel_lsub[k], segrep, &repfnz[k],
-					     marker, parent, xplore, &Glu)))
-		    return;
-
-		/* Numeric updates */
-		if ((*info = zcolumn_bmod(jj, (nseg - nseg1), &dense[k],
-					  tempv, &segrep[nseg1], &repfnz[k],
-					  jcol, &Glu, stat)) != 0) return;
-
-		/* Make a fill-in position if the column is entirely zero */
-		if (xlsub[jj + 1] == xlsub[jj]) {
-		    register int i, row;
-		    int nextl;
-		    int nzlmax = Glu.nzlmax;
-		    int *lsub = Glu.lsub;
-		    int *marker2 = marker + 2 * m;
-
-		    /* Allocate memory */
-		    nextl = xlsub[jj] + 1;
-		    if (nextl >= nzlmax) {
-			int error = zLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu);
-			if (error) { *info = error; return; }
-			lsub = Glu.lsub;
-		    }
-		    xlsub[jj + 1]++;
-		    assert(xlusup[jj]==xlusup[jj+1]);
-		    xlusup[jj + 1]++;
-		    Glu.lusup[xlusup[jj]] = zero;
-
-		    /* Choose a row index (pivrow) for fill-in */
-		    for (i = jj; i < n; i++)
-			if (marker_relax[swap[i]] <= jj) break;
-		    row = swap[i];
-		    marker2[row] = jj;
-		    lsub[xlsub[jj]] = row;
-#ifdef DEBUG
-		    printf("Fill col %d.\n", jj);
-		    fflush(stdout);
-#endif
-		}
-
-		/* Computer the quota */
-		if (drop_rule & DROP_PROWS)
-		    quota = gamma * Astore->nnz / m * jj / m;
-		else if (drop_rule & DROP_COLUMN)
-		    quota = gamma * (xa_end[jj] - xa_begin[jj]) *
-			    (jj + 1) / m;
-		else if (drop_rule & DROP_AREA)
-		    quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj;
-		else
-		    quota = m;
-
-		/* Copy the U-segments to ucol[*] and drop small entries */
-		if ((*info = ilu_zcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-					       perm_r, &dense[k], drop_rule,
-					       milu, amax[jj - jcol] * tol_U,
-					       quota, &drop_sum, &nnzUj, &Glu,
-					       iwork2)) != 0)
-		    return;
-
-		/* Reset the dropping threshold if required */
-		if (drop_rule & DROP_DYNAMIC) {
-		    if (gamma * 0.9 * nnzAj * 0.5 < nnzLj)
-			tol_U = SUPERLU_MIN(1.0, tol_U * 2.0);
-		    else
-			tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5);
-		}
-
-                zd_mult(&drop_sum, &drop_sum, MILU_ALPHA);
-		if (usepr) pivrow = iperm_r[jj];
-		fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn);
-		if ( (*info = ilu_zpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-					  iperm_c[jj], swap, iswap,
-					  marker_relax, &pivrow,
-					  amax[jj - jcol] * fill_tol, milu,
-					  drop_sum, &Glu, stat)) ) {
-		    iinfo++;
-		    marker[m + pivrow] = jj;
-		    marker[2 * m + pivrow] = jj;
-		}
-
-		/* Reset repfnz[] for this column */
-		resetrep_col (nseg, segrep, &repfnz[k]);
-
-		/* Start a new supernode, drop the previous one */
-		if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) {
-		    int first = xsup[supno[jj - 1]];
-		    int last = jj - 1;
-		    int quota;
-
-		    /* Compute the quota */
-		    if (drop_rule & DROP_PROWS)
-			quota = gamma * Astore->nnz / m * (m - first) / m
-				* (last - first + 1);
-		    else if (drop_rule & DROP_COLUMN) {
-			int i;
-			quota = 0;
-			for (i = first; i <= last; i++)
-			    quota += xa_end[i] - xa_begin[i];
-			quota = gamma * quota * (m - first) / m;
-		    } else if (drop_rule & DROP_AREA)
-			quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0)
-				/ m) - nnzLj;
-		    else
-			quota = m * n;
-		    fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) /
-			    (double)min_mn);
-
-		    /* Drop small rows */
-                    dtempv = (double *) tempv;
-		    i = ilu_zdrop_row(options, first, last, tol_L, quota,
-				      &nnzLj, &fill_tol, &Glu, dtempv, iwork2,
-				      1);
-
-		    /* Reset the parameters */
-		    if (drop_rule & DROP_DYNAMIC) {
-			if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m)
-				< nnzLj)
-			    tol_L = SUPERLU_MIN(1.0, tol_L * 2.0);
-			else
-			    tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5);
-		    }
-		    if (fill_tol < 0) iinfo -= (int)fill_tol;
-#ifdef DEBUG
-		    num_drop_L += i * (last - first + 1);
-#endif
-		} /* if start a new supernode */
-
-	    } /* for */
-
-	    jcol += panel_size; /* Move to the next panel */
-
-	} /* else */
-
-    } /* for */
-
-    *info = iinfo;
-
-    if ( m > n ) {
-	k = 0;
-	for (i = 0; i < m; ++i)
-	    if ( perm_r[i] == EMPTY ) {
-		perm_r[i] = n + k;
-		++k;
-	    }
-    }
-
-    ilu_countnz(min_mn, &nnzL, &nnzU, &Glu);
-    fixupL(min_mn, perm_r, &Glu);
-
-    zLUWorkFree(iwork, zwork, &Glu); /* Free work space and compress storage */
-
-    if ( fact == SamePattern_SameRowPerm ) {
-	/* L and U structures may have changed due to possibly different
-	   pivoting, even though the storage is available.
-	   There could also be memory expansions, so the array locations
-	   may have changed, */
-	((SCformat *)L->Store)->nnz = nnzL;
-	((SCformat *)L->Store)->nsuper = Glu.supno[n];
-	((SCformat *)L->Store)->nzval = Glu.lusup;
-	((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-	((SCformat *)L->Store)->rowind = Glu.lsub;
-	((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-	((NCformat *)U->Store)->nnz = nnzU;
-	((NCformat *)U->Store)->nzval = Glu.ucol;
-	((NCformat *)U->Store)->rowind = Glu.usub;
-	((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-	zCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup,
-				 Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-				 Glu.xsup, SLU_SC, SLU_Z, SLU_TRLU);
-	zCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
-			       Glu.usub, Glu.xusub, SLU_NC, SLU_Z, SLU_TRU);
-    }
-
-    ops[FACT] += ops[TRSV] + ops[GEMV];
-    stat->expansions = --(Glu.num_expansions);
-
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
-    SUPERLU_FREE (swap);
-    SUPERLU_FREE (iswap);
-    SUPERLU_FREE (relax_fsupc);
-    SUPERLU_FREE (amax);
-    if ( iwork2 ) SUPERLU_FREE (iwork2);
-
-}
diff --git a/thirdparty/superlu/SuperLU_4.1/SRC/zlacon.c.bak b/thirdparty/superlu/SuperLU_4.1/SRC/zlacon.c.bak
deleted file mode 100644
index b2cd1ed..0000000
--- a/thirdparty/superlu/SuperLU_4.1/SRC/zlacon.c.bak
+++ /dev/null
@@ -1,221 +0,0 @@
-
-/*! @file zlacon.c
- * \brief Estimates the 1-norm
- *
- * <pre>
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- * </pre>
- */
-#include <math.h>
-#include "slu_Cnames.h"
-#include "slu_dcomplex.h"
-
-/*! \brief
- *
- * <pre>
- *   Purpose   
- *   =======   
- *
- *   ZLACON estimates the 1-norm of a square matrix A.   
- *   Reverse communication is used for evaluating matrix-vector products. 
- * 
- *
- *   Arguments   
- *   =========   
- *
- *   N      (input) INT
- *          The order of the matrix.  N >= 1.   
- *
- *   V      (workspace) DOUBLE COMPLEX PRECISION array, dimension (N)   
- *          On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
- *          (W is not returned).   
- *
- *   X      (input/output) DOUBLE COMPLEX PRECISION array, dimension (N)   
- *          On an intermediate return, X should be overwritten by   
- *                A * X,   if KASE=1,   
- *                A' * X,  if KASE=2,
- *          where A' is the conjugate transpose of A,
- *         and ZLACON must be re-called with all the other parameters   
- *          unchanged.   
- *
- *
- *   EST    (output) DOUBLE PRECISION   
- *          An estimate (a lower bound) for norm(A).   
- *
- *   KASE   (input/output) INT
- *          On the initial call to ZLACON, KASE should be 0.   
- *          On an intermediate return, KASE will be 1 or 2, indicating   
- *          whether X should be overwritten by A * X  or A' * X.   
- *          On the final return from ZLACON, KASE will again be 0.   
- *
- *   Further Details   
- *   ======= =======   
- *
- *   Contributed by Nick Higham, University of Manchester.   
- *   Originally named CONEST, dated March 16, 1988.   
- *
- *   Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
- *   a real or complex matrix, with applications to condition estimation", 
- *   ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
- *   ===================================================================== 
- * </pre>
- */
-
-int
-zlacon_(int *n, doublecomplex *v, doublecomplex *x, double *est, int *kase)
-
-{
-
-
-    /* Table of constant values */
-    int c__1 = 1;
-    doublecomplex      zero = {0.0, 0.0};
-    doublecomplex      one = {1.0, 0.0};
-
-    /* System generated locals */
-    double d__1;
-    
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static double altsgn, estold;
-    static int i, j;
-    double temp;
-    double safmin;
-    extern double dlamch_(char *);
-    extern int izmax1_(int *, doublecomplex *, int *);
-    extern double dzsum1_(int *, doublecomplex *, int *);
-
-    safmin = dlamch_("Safe minimum");
-    if ( *kase == 0 ) {
-	for (i = 0; i < *n; ++i) {
-	    x[i].r = 1. / (double) (*n);
-	    x[i].i = 0.;
-	}
-	*kase = 1;
-	jump = 1;
-	return 0;
-    }
-
-    switch (jump) {
-	case 1:  goto L20;
-	case 2:  goto L40;
-	case 3:  goto L70;
-	case 4:  goto L110;
-	case 5:  goto L140;
-    }
-
-    /*     ................ ENTRY   (JUMP = 1)   
-	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
-  L20:
-    if (*n == 1) {
-	v[0] = x[0];
-	*est = z_abs(&v[0]);
-	/*        ... QUIT */
-	goto L150;
-    }
-    *est = dzsum1_(n, x, &c__1);
-
-    for (i = 0; i < *n; ++i) {
-	d__1 = z_abs(&x[i]);
-	if (d__1 > safmin) {
-	    d__1 = 1 / d__1;
-	    x[i].r *= d__1;
-	    x[i].i *= d__1;
-	} else {
-	    x[i] = one;
-	}
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 2)   
-	   FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
-L40:
-    j = izmax1_(n, &x[0], &c__1);
-    --j;
-    iter = 2;
-
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-L50:
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 3)   
-	   X HAS BEEN OVERWRITTEN BY A*X. */
-L70:
-#ifdef _CRAY
-    CCOPY(n, x, &c__1, v, &c__1);
-#else
-    zcopy_(n, x, &c__1, v, &c__1);
-#endif
-    estold = *est;
-    *est = dzsum1_(n, v, &c__1);
-
-
-L90:
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-
-    for (i = 0; i < *n; ++i) {
-	d__1 = z_abs(&x[i]);
-	if (d__1 > safmin) {
-	    d__1 = 1 / d__1;
-	    x[i].r *= d__1;
-	    x[i].i *= d__1;
-	} else {
-	    x[i] = one;
-	}
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 4)   
-	   X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
-L110:
-    jlast = j;
-    j = izmax1_(n, &x[0], &c__1);
-    --j;
-    if (x[jlast].r != (d__1 = x[j].r, fabs(d__1)) && iter < 5) {
-	++iter;
-	goto L50;
-    }
-
-    /*     ITERATION COMPLETE.  FINAL STAGE. */
-L120:
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-	x[i-1].r = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.);
-	x[i-1].i = 0.;
-	altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    
-    /*     ................ ENTRY   (JUMP = 5)   
-	   X HAS BEEN OVERWRITTEN BY A*X. */
-L140:
-    temp = dzsum1_(n, x, &c__1) / (double)(*n * 3) * 2.;
-    if (temp > *est) {
-#ifdef _CRAY
-	CCOPY(n, &x[0], &c__1, &v[0], &c__1);
-#else
-	zcopy_(n, &x[0], &c__1, &v[0], &c__1);
-#endif
-	*est = temp;
-    }
-
-L150:
-    *kase = 0;
-    return 0;
-
-} /* zlacon_ */
diff --git a/thirdparty/superlu/SuperLU_4.1/TESTING/MATGEN/Cnames.h.bak b/thirdparty/superlu/SuperLU_4.1/TESTING/MATGEN/Cnames.h.bak
deleted file mode 100644
index 28eecfa..0000000
--- a/thirdparty/superlu/SuperLU_4.1/TESTING/MATGEN/Cnames.h.bak
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 1, 1997
- *
- */
-#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */
-#define __SUPERLU_CNAMES
-
-/*
- * These macros define how C routines will be called.  ADD_ assumes that
- * they will be called by fortran, which expects C routines to have an
- * underscore postfixed to the name (Suns, and the Intel expect this).
- * NOCHANGE indicates that fortran will be calling, and that it expects
- * the name called by fortran to be identical to that compiled by the C
- * (RS6K's do this).  UPCASE says it expects C routines called by fortran
- * to be in all upcase (CRAY wants this). 
- */
-
-#define ADD_       0
-#define NOCHANGE   1
-#define UPCASE     2
-#define C_CALL     3
-
-#ifdef UpCase
-#define F77_CALL_C UPCASE
-#endif
-
-#ifdef NoChange
-#define F77_CALL_C NOCHANGE
-#endif
-
-#ifdef Add_
-#define F77_CALL_C ADD_
-#endif
-
-#ifndef F77_CALL_C
-#define F77_CALL_C ADD_
-#endif
-
-#if (F77_CALL_C == ADD_)
-/*
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine
- * No redefinition necessary to have following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void dgemm_(...)
- *
- * This is the default.
- */
-
-#endif
-
-#if (F77_CALL_C == UPCASE)
-/*
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine 
- * following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void DGEMM(...)
- */
-#define sasum_    SASUM
-#define isamax_   ISAMAX
-#define scopy_    SCOPY
-#define sscal_    SSCAL
-#define sger_     SGER
-#define snrm2_    SNRM2
-#define ssymv_    SSYMV
-#define sdot_     SDOT
-#define saxpy_    SAXPY
-#define ssyr2_    SSYR2
-#define srot_     SROT
-#define sgemv_    SGEMV
-#define strsv_    STRSV
-#define sgemm_    SGEMM
-#define strsm_    STRSM
-
-#define dasum_    SASUM
-#define idamax_   ISAMAX
-#define dcopy_    SCOPY
-#define dscal_    SSCAL
-#define dger_     SGER
-#define dnrm2_    SNRM2
-#define dsymv_    SSYMV
-#define ddot_     SDOT
-#define daxpy_    SAXPY
-#define dsyr2_    SSYR2
-#define drot_     SROT
-#define dgemv_    SGEMV
-#define dtrsv_    STRSV
-#define dgemm_    SGEMM
-#define dtrsm_    STRSM
-
-#define scasum_   SCASUM
-#define icamax_   ICAMAX
-#define ccopy_    CCOPY
-#define cscal_    CSCAL
-#define scnrm2_   SCNRM2
-#define caxpy_    CAXPY
-#define cgemv_    CGEMV
-#define ctrsv_    CTRSV
-#define cgemm_    CGEMM
-#define ctrsm_    CTRSM
-#define cgerc_    CGERC
-#define chemv_    CHEMV
-#define cher2_    CHER2
-
-#define dzasum_   SCASUM
-#define izamax_   ICAMAX
-#define zcopy_    CCOPY
-#define zscal_    CSCAL
-#define dznrm2_   SCNRM2
-#define zaxpy_    CAXPY
-#define zgemv_    CGEMV
-#define ztrsv_    CTRSV
-#define zgemm_    CGEMM
-#define ztrsm_    CTRSM
-#define zgerc_    CGERC
-#define zhemv_    CHEMV
-#define zher2_    CHER2
-
-#define c_bridge_dgssv_ C_BRIDGE_DGSSV
-#endif
-
-#if (F77_CALL_C == NOCHANGE)
-/*
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine 
- * for following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void dgemm(...)
- */
-#define sasum_    sasum
-#define isamax_   isamax
-#define scopy_    scopy
-#define sscal_    sscal
-#define sger_     sger
-#define snrm2_    snrm2
-#define ssymv_    ssymv
-#define sdot_     sdot
-#define saxpy_    saxpy
-#define ssyr2_    ssyr2
-#define srot_     srot
-#define sgemv_    sgemv
-#define strsv_    strsv
-#define sgemm_    sgemm
-#define strsm_    strsm
-
-#define dasum_    dasum
-#define idamax_   idamax
-#define dcopy_    dcopy
-#define dscal_    dscal
-#define dger_     dger
-#define dnrm2_    dnrm2
-#define dsymv_    dsymv
-#define ddot_     ddot
-#define daxpy_    daxpy
-#define dsyr2_    dsyr2
-#define drot_     drot
-#define dgemv_    dgemv
-#define dtrsv_    dtrsv
-#define dgemm_    dgemm
-#define dtrsm_    dtrsm
-
-#define scasum_   scasum
-#define icamax_   icamax
-#define ccopy_    ccopy
-#define cscal_    cscal
-#define scnrm2_   scnrm2
-#define caxpy_    caxpy
-#define cgemv_    cgemv
-#define ctrsv_    ctrsv
-#define cgemm_    cgemm
-#define ctrsm_    ctrsm
-#define cgerc_    cgerc
-#define chemv_    chemv
-#define cher2_    cher2
-
-#define dzasum_   dzasum
-#define izamax_   izamax
-#define zcopy_    zcopy
-#define zscal_    zscal
-#define dznrm2_   dznrm2
-#define zaxpy_    zaxpy
-#define zgemv_    zgemv
-#define ztrsv_    ztrsv
-#define zgemm_    zgemm
-#define ztrsm_    ztrsm
-#define zgerc_    zgerc
-#define zhemv_    zhemv
-#define zher2_    zher2
-
-#define c_bridge_dgssv_ c_bridge_dgssv
-#endif
-
-#endif /* __SUPERLU_CNAMES */