kusano 7d535a
kusano 7d535a
/*! @file ilu_zpivotL.c
kusano 7d535a
 * \brief Performs numerical pivoting
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 4.0) --
kusano 7d535a
 * Lawrence Berkeley National Laboratory
kusano 7d535a
 * June 30, 2009
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
kusano 7d535a
#include <math.h></math.h>
kusano 7d535a
#include <stdlib.h></stdlib.h>
kusano 7d535a
#include "slu_zdefs.h"
kusano 7d535a
kusano 7d535a
#ifndef SGN
kusano 7d535a
#define SGN(x) ((x)>=0?1:-1)
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
/*! \brief
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * Purpose
kusano 7d535a
 * =======
kusano 7d535a
 *   Performs the numerical pivoting on the current column of L,
kusano 7d535a
 *   and the CDIV operation.
kusano 7d535a
 *
kusano 7d535a
 *   Pivot policy:
kusano 7d535a
 *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
kusano 7d535a
 *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
kusano 7d535a
 *	     pivot row = k;
kusano 7d535a
 *	 ELSE IF abs(A_jj) >= thresh THEN
kusano 7d535a
 *	     pivot row = j;
kusano 7d535a
 *	 ELSE
kusano 7d535a
 *	     pivot row = m;
kusano 7d535a
 *
kusano 7d535a
 *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
kusano 7d535a
 *
kusano 7d535a
 *   Return value: 0	  success;
kusano 7d535a
 *		   i > 0  U(i,i) is exactly zero.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
int
kusano 7d535a
ilu_zpivotL(
kusano 7d535a
	const int  jcol,     /* in */
kusano 7d535a
	const double u,      /* in - diagonal pivoting threshold */
kusano 7d535a
	int	   *usepr,   /* re-use the pivot sequence given by
kusano 7d535a
			      * perm_r/iperm_r */
kusano 7d535a
	int	   *perm_r,  /* may be modified */
kusano 7d535a
	int	   diagind,  /* diagonal of Pc*A*Pc' */
kusano 7d535a
	int	   *swap,    /* in/out record the row permutation */
kusano 7d535a
	int	   *iswap,   /* in/out inverse of swap, it is the same as
kusano 7d535a
				perm_r after the factorization */
kusano 7d535a
	int	   *marker,  /* in */
kusano 7d535a
	int	   *pivrow,  /* in/out, as an input if *usepr!=0 */
kusano 7d535a
	double	   fill_tol, /* in - fill tolerance of current column
kusano 7d535a
			      * used for a singular column */
kusano 7d535a
	milu_t	   milu,     /* in */
kusano 7d535a
	doublecomplex	   drop_sum, /* in - computed in ilu_zcopy_to_ucol()
kusano 7d535a
                                (MILU only) */
kusano 7d535a
	GlobalLU_t *Glu,     /* modified - global LU data structures */
kusano 7d535a
	SuperLUStat_t *stat  /* output */
kusano 7d535a
       )
kusano 7d535a
{
kusano 7d535a
kusano 7d535a
    int		 n;	 /* number of columns */
kusano 7d535a
    int		 fsupc;  /* first column in the supernode */
kusano 7d535a
    int		 nsupc;  /* no of columns in the supernode */
kusano 7d535a
    int		 nsupr;  /* no of rows in the supernode */
kusano 7d535a
    int		 lptr;	 /* points to the starting subscript of the supernode */
kusano 7d535a
    register int	 pivptr;
kusano 7d535a
    int		 old_pivptr, diag, ptr0;
kusano 7d535a
    register double  pivmax, rtemp;
kusano 7d535a
    double	 thresh;
kusano 7d535a
    doublecomplex	 temp;
kusano 7d535a
    doublecomplex	 *lu_sup_ptr;
kusano 7d535a
    doublecomplex	 *lu_col_ptr;
kusano 7d535a
    int		 *lsub_ptr;
kusano 7d535a
    register int	 isub, icol, k, itemp;
kusano 7d535a
    int		 *lsub, *xlsub;
kusano 7d535a
    doublecomplex	 *lusup;
kusano 7d535a
    int		 *xlusup;
kusano 7d535a
    flops_t	 *ops = stat->ops;
kusano 7d535a
    int		 info;
kusano 7d535a
    doublecomplex one = {1.0, 0.0};
kusano 7d535a
kusano 7d535a
    /* Initialize pointers */
kusano 7d535a
    n	       = Glu->n;
kusano 7d535a
    lsub       = Glu->lsub;
kusano 7d535a
    xlsub      = Glu->xlsub;
kusano 7d535a
    lusup      = Glu->lusup;
kusano 7d535a
    xlusup     = Glu->xlusup;
kusano 7d535a
    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
kusano 7d535a
    nsupc      = jcol - fsupc;		/* excluding jcol; nsupc >= 0 */
kusano 7d535a
    lptr       = xlsub[fsupc];
kusano 7d535a
    nsupr      = xlsub[fsupc+1] - lptr;
kusano 7d535a
    lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */
kusano 7d535a
    lu_col_ptr = &lusup[xlusup[jcol]];	/* start of jcol in the supernode */
kusano 7d535a
    lsub_ptr   = &lsub[lptr];	/* start of row indices of the supernode */
kusano 7d535a
kusano 7d535a
    /* Determine the largest abs numerical value for partial pivoting;
kusano 7d535a
       Also search for user-specified pivot, and diagonal element. */
kusano 7d535a
    pivmax = -1.0;
kusano 7d535a
    pivptr = nsupc;
kusano 7d535a
    diag = EMPTY;
kusano 7d535a
    old_pivptr = nsupc;
kusano 7d535a
    ptr0 = EMPTY;
kusano 7d535a
    for (isub = nsupc; isub < nsupr; ++isub) {
kusano 7d535a
        if (marker[lsub_ptr[isub]] > jcol)
kusano 7d535a
            continue; /* do not overlap with a later relaxed supernode */
kusano 7d535a
kusano 7d535a
	switch (milu) {
kusano 7d535a
	    case SMILU_1:
kusano 7d535a
                z_add(&temp, &lu_col_ptr[isub], &drop_sum);
kusano 7d535a
		rtemp = z_abs1(&temp);
kusano 7d535a
		break;
kusano 7d535a
	    case SMILU_2:
kusano 7d535a
	    case SMILU_3:
kusano 7d535a
                /* In this case, drop_sum contains the sum of the abs. value */
kusano 7d535a
		rtemp = z_abs1(&lu_col_ptr[isub]);
kusano 7d535a
		break;
kusano 7d535a
	    case SILU:
kusano 7d535a
	    default:
kusano 7d535a
		rtemp = z_abs1(&lu_col_ptr[isub]);
kusano 7d535a
		break;
kusano 7d535a
	}
kusano 7d535a
	if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; }
kusano 7d535a
	if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub;
kusano 7d535a
	if (lsub_ptr[isub] == diagind) diag = isub;
kusano 7d535a
	if (ptr0 == EMPTY) ptr0 = isub;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum.r;
kusano 7d535a
kusano 7d535a
    /* Test for singularity */
kusano 7d535a
    if (pivmax < 0.0) {
kusano 7d535a
	fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol);
kusano 7d535a
	fflush(stderr);
kusano 7d535a
	exit(1);
kusano 7d535a
    }
kusano 7d535a
    if ( pivmax == 0.0 ) {
kusano 7d535a
	if (diag != EMPTY)
kusano 7d535a
	    *pivrow = lsub_ptr[pivptr = diag];
kusano 7d535a
	else if (ptr0 != EMPTY)
kusano 7d535a
	    *pivrow = lsub_ptr[pivptr = ptr0];
kusano 7d535a
	else {
kusano 7d535a
	    /* look for the first row which does not
kusano 7d535a
	       belong to any later supernodes */
kusano 7d535a
	    for (icol = jcol; icol < n; icol++)
kusano 7d535a
		if (marker[swap[icol]] <= jcol) break;
kusano 7d535a
	    if (icol >= n) {
kusano 7d535a
		fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol);
kusano 7d535a
		fflush(stderr);
kusano 7d535a
		exit(1);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    *pivrow = swap[icol];
kusano 7d535a
kusano 7d535a
	    /* pick up the pivot row */
kusano 7d535a
	    for (isub = nsupc; isub < nsupr; ++isub)
kusano 7d535a
		if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; }
kusano 7d535a
	}
kusano 7d535a
	pivmax = fill_tol;
kusano 7d535a
	lu_col_ptr[pivptr].r = pivmax;
kusano 7d535a
	lu_col_ptr[pivptr].i = 0.0;
kusano 7d535a
	*usepr = 0;
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
	printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol);
kusano 7d535a
	fflush(stdout);
kusano 7d535a
#endif
kusano 7d535a
	info =jcol + 1;
kusano 7d535a
    } /* if (*pivrow == 0.0) */
kusano 7d535a
    else {
kusano 7d535a
	thresh = u * pivmax;
kusano 7d535a
kusano 7d535a
	/* Choose appropriate pivotal element by our policy. */
kusano 7d535a
	if ( *usepr ) {
kusano 7d535a
	    switch (milu) {
kusano 7d535a
		case SMILU_1:
kusano 7d535a
                    z_add(&temp, &lu_col_ptr[old_pivptr], &drop_sum);
kusano 7d535a
		    rtemp = z_abs1(&temp);
kusano 7d535a
		    break;
kusano 7d535a
		case SMILU_2:
kusano 7d535a
		case SMILU_3:
kusano 7d535a
		    rtemp = z_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r;
kusano 7d535a
		    break;
kusano 7d535a
		case SILU:
kusano 7d535a
		default:
kusano 7d535a
		    rtemp = z_abs1(&lu_col_ptr[old_pivptr]);
kusano 7d535a
		    break;
kusano 7d535a
	    }
kusano 7d535a
	    if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr;
kusano 7d535a
	    else *usepr = 0;
kusano 7d535a
	}
kusano 7d535a
	if ( *usepr == 0 ) {
kusano 7d535a
	    /* Use diagonal pivot? */
kusano 7d535a
	    if ( diag >= 0 ) { /* diagonal exists */
kusano 7d535a
		switch (milu) {
kusano 7d535a
		    case SMILU_1:
kusano 7d535a
                        z_add(&temp, &lu_col_ptr[diag], &drop_sum);
kusano 7d535a
         	        rtemp = z_abs1(&temp);
kusano 7d535a
			break;
kusano 7d535a
		    case SMILU_2:
kusano 7d535a
		    case SMILU_3:
kusano 7d535a
			rtemp = z_abs1(&lu_col_ptr[diag]) + drop_sum.r;
kusano 7d535a
			break;
kusano 7d535a
		    case SILU:
kusano 7d535a
		    default:
kusano 7d535a
			rtemp = z_abs1(&lu_col_ptr[diag]);
kusano 7d535a
			break;
kusano 7d535a
		}
kusano 7d535a
		if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
kusano 7d535a
	    }
kusano 7d535a
	    *pivrow = lsub_ptr[pivptr];
kusano 7d535a
	}
kusano 7d535a
	info = 0;
kusano 7d535a
kusano 7d535a
	/* Reset the diagonal */
kusano 7d535a
	switch (milu) {
kusano 7d535a
	    case SMILU_1:
kusano 7d535a
		z_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum);
kusano 7d535a
		break;
kusano 7d535a
	    case SMILU_2:
kusano 7d535a
	    case SMILU_3:
kusano 7d535a
                temp = z_sgn(&lu_col_ptr[pivptr]);
kusano 7d535a
                zz_mult(&temp, &temp, &drop_sum);
kusano 7d535a
                z_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum);
kusano 7d535a
		break;
kusano 7d535a
	    case SILU:
kusano 7d535a
	    default:
kusano 7d535a
		break;
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
    } /* else */
kusano 7d535a
kusano 7d535a
    /* Record pivot row */
kusano 7d535a
    perm_r[*pivrow] = jcol;
kusano 7d535a
    if (jcol < n - 1) {
kusano 7d535a
	register int t1, t2, t;
kusano 7d535a
	t1 = iswap[*pivrow]; t2 = jcol;
kusano 7d535a
	if (t1 != t2) {
kusano 7d535a
	    t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t;
kusano 7d535a
	    t1 = swap[t1]; t2 = t;
kusano 7d535a
	    t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t;
kusano 7d535a
	}
kusano 7d535a
    } /* if (jcol < n - 1) */
kusano 7d535a
kusano 7d535a
    /* Interchange row subscripts */
kusano 7d535a
    if ( pivptr != nsupc ) {
kusano 7d535a
	itemp = lsub_ptr[pivptr];
kusano 7d535a
	lsub_ptr[pivptr] = lsub_ptr[nsupc];
kusano 7d535a
	lsub_ptr[nsupc] = itemp;
kusano 7d535a
kusano 7d535a
	/* Interchange numerical values as well, for the whole snode, such 
kusano 7d535a
	 * that L is indexed the same way as A.
kusano 7d535a
	 */
kusano 7d535a
	for (icol = 0; icol <= nsupc; icol++) {
kusano 7d535a
	    itemp = pivptr + icol * nsupr;
kusano 7d535a
	    temp = lu_sup_ptr[itemp];
kusano 7d535a
	    lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
kusano 7d535a
	    lu_sup_ptr[nsupc + icol*nsupr] = temp;
kusano 7d535a
	}
kusano 7d535a
    } /* if */
kusano 7d535a
kusano 7d535a
    /* cdiv operation */
kusano 7d535a
    ops[FACT] += 10 * (nsupr - nsupc);
kusano 7d535a
    z_div(&temp, &one, &lu_col_ptr[nsupc]);
kusano 7d535a
    for (k = nsupc+1; k < nsupr; k++) 
kusano 7d535a
	zz_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp);
kusano 7d535a
kusano 7d535a
    return info;
kusano 7d535a
}