kusano 7d535a
kusano 7d535a
/*! @file ilu_sdrop_row.c
kusano 7d535a
 * \brief Drop small rows from L
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 4.1) --
kusano 7d535a
 * Lawrence Berkeley National Laboratory.
kusano 7d535a
 * June 30, 2009
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
#include <math.h></math.h>
kusano 7d535a
#include <stdlib.h></stdlib.h>
kusano 7d535a
#include "slu_sdefs.h"
kusano 7d535a
kusano 7d535a
extern void sswap_(int *, float [], int *, float [], int *);
kusano 7d535a
extern void saxpy_(int *, float *, float [], int *, float [], int *);
kusano 7d535a
extern void scopy_(int *, float [], int *, float [], int *);
kusano 7d535a
extern float sasum_(int *, float *, int *);
kusano 7d535a
extern float snrm2_(int *, float *, int *);
kusano 7d535a
extern double dnrm2_(int *, double [], int *);
kusano 7d535a
extern int isamax_(int *, float [], int *);
kusano 7d535a
kusano 7d535a
static float *A;  /* used in _compare_ only */
kusano 7d535a
static int _compare_(const void *a, const void *b)
kusano 7d535a
{
kusano 7d535a
    register int *x = (int *)a, *y = (int *)b;
kusano 7d535a
    if (A[*x] - A[*y] > 0.0) return -1;
kusano 7d535a
    else if (A[*x] - A[*y] < 0.0) return 1;
kusano 7d535a
    else return 0;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/*! \brief
kusano 7d535a
 * 
kusano 7d535a
 * Purpose
kusano 7d535a
 * =======
kusano 7d535a
 *    ilu_sdrop_row() - Drop some small rows from the previous 
kusano 7d535a
 *    supernode (L-part only).
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
int ilu_sdrop_row(
kusano 7d535a
	superlu_options_t *options, /* options */
kusano 7d535a
	int    first,	    /* index of the first column in the supernode */
kusano 7d535a
	int    last,	    /* index of the last column in the supernode */
kusano 7d535a
	double drop_tol,    /* dropping parameter */
kusano 7d535a
	int    quota,	    /* maximum nonzero entries allowed */
kusano 7d535a
	int    *nnzLj,	    /* in/out number of nonzeros in L(:, 1:last) */
kusano 7d535a
	double *fill_tol,   /* in/out - on exit, fill_tol=-num_zero_pivots,
kusano 7d535a
			     * does not change if options->ILU_MILU != SMILU1 */
kusano 7d535a
	GlobalLU_t *Glu,    /* modified */
kusano 7d535a
	float swork[],   /* working space
kusano 7d535a
	                     * the length of swork[] should be no less than
kusano 7d535a
			     * the number of rows in the supernode */
kusano 7d535a
	float swork2[], /* working space with the same size as swork[],
kusano 7d535a
			     * used only by the second dropping rule */
kusano 7d535a
	int    lastc	    /* if lastc == 0, there is nothing after the
kusano 7d535a
			     * working supernode [first:last];
kusano 7d535a
			     * if lastc == 1, there is one more column after
kusano 7d535a
			     * the working supernode. */ )
kusano 7d535a
{
kusano 7d535a
    register int i, j, k, m1;
kusano 7d535a
    register int nzlc; /* number of nonzeros in column last+1 */
kusano 7d535a
    register int xlusup_first, xlsub_first;
kusano 7d535a
    int m, n; /* m x n is the size of the supernode */
kusano 7d535a
    int r = 0; /* number of dropped rows */
kusano 7d535a
    register float *temp;
kusano 7d535a
    register float *lusup = Glu->lusup;
kusano 7d535a
    register int *lsub = Glu->lsub;
kusano 7d535a
    register int *xlsub = Glu->xlsub;
kusano 7d535a
    register int *xlusup = Glu->xlusup;
kusano 7d535a
    register float d_max = 0.0, d_min = 1.0;
kusano 7d535a
    int    drop_rule = options->ILU_DropRule;
kusano 7d535a
    milu_t milu = options->ILU_MILU;
kusano 7d535a
    norm_t nrm = options->ILU_Norm;
kusano 7d535a
    float zero = 0.0;
kusano 7d535a
    float one = 1.0;
kusano 7d535a
    float none = -1.0;
kusano 7d535a
    int i_1 = 1;
kusano 7d535a
    int inc_diag; /* inc_diag = m + 1 */
kusano 7d535a
    int nzp = 0;  /* number of zero pivots */
kusano 7d535a
    float alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);
kusano 7d535a
kusano 7d535a
    xlusup_first = xlusup[first];
kusano 7d535a
    xlsub_first = xlsub[first];
kusano 7d535a
    m = xlusup[first + 1] - xlusup_first;
kusano 7d535a
    n = last - first + 1;
kusano 7d535a
    m1 = m - 1;
kusano 7d535a
    inc_diag = m + 1;
kusano 7d535a
    nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
kusano 7d535a
    temp = swork - n;
kusano 7d535a
kusano 7d535a
    /* Quick return if nothing to do. */
kusano 7d535a
    if (m == 0 || m == n || drop_rule == NODROP)
kusano 7d535a
    {
kusano 7d535a
	*nnzLj += m * n;
kusano 7d535a
	return 0;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* basic dropping: ILU(tau) */
kusano 7d535a
    for (i = n; i <= m1; )
kusano 7d535a
    {
kusano 7d535a
	/* the average abs value of ith row */
kusano 7d535a
	switch (nrm)
kusano 7d535a
	{
kusano 7d535a
	    case ONE_NORM:
kusano 7d535a
		temp[i] = sasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
kusano 7d535a
		break;
kusano 7d535a
	    case TWO_NORM:
kusano 7d535a
		temp[i] = snrm2_(&n, &lusup[xlusup_first + i], &m)
kusano 7d535a
		    / sqrt((double)n);
kusano 7d535a
		break;
kusano 7d535a
	    case INF_NORM:
kusano 7d535a
	    default:
kusano 7d535a
		k = isamax_(&n, &lusup[xlusup_first + i], &m) - 1;
kusano 7d535a
		temp[i] = fabs(lusup[xlusup_first + i + m * k]);
kusano 7d535a
		break;
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	/* drop small entries due to drop_tol */
kusano 7d535a
	if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
kusano 7d535a
	{
kusano 7d535a
	    r++;
kusano 7d535a
	    /* drop the current row and move the last undropped row here */
kusano 7d535a
	    if (r > 1) /* add to last row */
kusano 7d535a
	    {
kusano 7d535a
		/* accumulate the sum (for MILU) */
kusano 7d535a
		switch (milu)
kusano 7d535a
		{
kusano 7d535a
		    case SMILU_1:
kusano 7d535a
		    case SMILU_2:
kusano 7d535a
			saxpy_(&n, &one, &lusup[xlusup_first + i], &m,
kusano 7d535a
				&lusup[xlusup_first + m - 1], &m);
kusano 7d535a
			break;
kusano 7d535a
		    case SMILU_3:
kusano 7d535a
			for (j = 0; j < n; j++)
kusano 7d535a
			    lusup[xlusup_first + (m - 1) + j * m] +=
kusano 7d535a
				    fabs(lusup[xlusup_first + i + j * m]);
kusano 7d535a
			break;
kusano 7d535a
		    case SILU:
kusano 7d535a
		    default:
kusano 7d535a
			break;
kusano 7d535a
		}
kusano 7d535a
		scopy_(&n, &lusup[xlusup_first + m1], &m,
kusano 7d535a
                       &lusup[xlusup_first + i], &m);
kusano 7d535a
	    } /* if (r > 1) */
kusano 7d535a
	    else /* move to last row */
kusano 7d535a
	    {
kusano 7d535a
		sswap_(&n, &lusup[xlusup_first + m1], &m,
kusano 7d535a
			&lusup[xlusup_first + i], &m);
kusano 7d535a
		if (milu == SMILU_3)
kusano 7d535a
		    for (j = 0; j < n; j++) {
kusano 7d535a
			lusup[xlusup_first + m1 + j * m] =
kusano 7d535a
				fabs(lusup[xlusup_first + m1 + j * m]);
kusano 7d535a
                    }
kusano 7d535a
	    }
kusano 7d535a
	    lsub[xlsub_first + i] = lsub[xlsub_first + m1];
kusano 7d535a
	    m1--;
kusano 7d535a
	    continue;
kusano 7d535a
	} /* if dropping */
kusano 7d535a
	else
kusano 7d535a
	{
kusano 7d535a
	    if (temp[i] > d_max) d_max = temp[i];
kusano 7d535a
	    if (temp[i] < d_min) d_min = temp[i];
kusano 7d535a
	}
kusano 7d535a
	i++;
kusano 7d535a
    } /* for */
kusano 7d535a
kusano 7d535a
    /* Secondary dropping: drop more rows according to the quota. */
kusano 7d535a
    quota = ceil((double)quota / (double)n);
kusano 7d535a
    if (drop_rule & DROP_SECONDARY && m - r > quota)
kusano 7d535a
    {
kusano 7d535a
	register double tol = d_max;
kusano 7d535a
kusano 7d535a
	/* Calculate the second dropping tolerance */
kusano 7d535a
	if (quota > n)
kusano 7d535a
	{
kusano 7d535a
	    if (drop_rule & DROP_INTERP) /* by interpolation */
kusano 7d535a
	    {
kusano 7d535a
		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
kusano 7d535a
		tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
kusano 7d535a
	    }
kusano 7d535a
	    else /* by quick select */
kusano 7d535a
	    {
kusano 7d535a
		int len = m1 - n + 1;
kusano 7d535a
		scopy_(&len, swork, &i_1, swork2, &i_1);
kusano 7d535a
		tol = sqselect(len, swork2, quota - n);
kusano 7d535a
#if 0
kusano 7d535a
		register int *itemp = iwork - n;
kusano 7d535a
		A = temp;
kusano 7d535a
		for (i = n; i <= m1; i++) itemp[i] = i;
kusano 7d535a
		qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
kusano 7d535a
		tol = temp[itemp[quota]];
kusano 7d535a
#endif
kusano 7d535a
	    }
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	for (i = n; i <= m1; )
kusano 7d535a
	{
kusano 7d535a
	    if (temp[i] <= tol)
kusano 7d535a
	    {
kusano 7d535a
		register int j;
kusano 7d535a
		r++;
kusano 7d535a
		/* drop the current row and move the last undropped row here */
kusano 7d535a
		if (r > 1) /* add to last row */
kusano 7d535a
		{
kusano 7d535a
		    /* accumulate the sum (for MILU) */
kusano 7d535a
		    switch (milu)
kusano 7d535a
		    {
kusano 7d535a
			case SMILU_1:
kusano 7d535a
			case SMILU_2:
kusano 7d535a
			    saxpy_(&n, &one, &lusup[xlusup_first + i], &m,
kusano 7d535a
				    &lusup[xlusup_first + m - 1], &m);
kusano 7d535a
			    break;
kusano 7d535a
			case SMILU_3:
kusano 7d535a
			    for (j = 0; j < n; j++)
kusano 7d535a
				lusup[xlusup_first + (m - 1) + j * m] +=
kusano 7d535a
					fabs(lusup[xlusup_first + i + j * m]);
kusano 7d535a
			    break;
kusano 7d535a
			case SILU:
kusano 7d535a
			default:
kusano 7d535a
			    break;
kusano 7d535a
		    }
kusano 7d535a
		    scopy_(&n, &lusup[xlusup_first + m1], &m,
kusano 7d535a
			    &lusup[xlusup_first + i], &m);
kusano 7d535a
		} /* if (r > 1) */
kusano 7d535a
		else /* move to last row */
kusano 7d535a
		{
kusano 7d535a
		    sswap_(&n, &lusup[xlusup_first + m1], &m,
kusano 7d535a
			    &lusup[xlusup_first + i], &m);
kusano 7d535a
		    if (milu == SMILU_3)
kusano 7d535a
			for (j = 0; j < n; j++) {
kusano 7d535a
			    lusup[xlusup_first + m1 + j * m] =
kusano 7d535a
				    fabs(lusup[xlusup_first + m1 + j * m]);
kusano 7d535a
                        }
kusano 7d535a
		}
kusano 7d535a
		lsub[xlsub_first + i] = lsub[xlsub_first + m1];
kusano 7d535a
		m1--;
kusano 7d535a
		temp[i] = temp[m1];
kusano 7d535a
kusano 7d535a
		continue;
kusano 7d535a
	    }
kusano 7d535a
	    i++;
kusano 7d535a
kusano 7d535a
	} /* for */
kusano 7d535a
kusano 7d535a
    } /* if secondary dropping */
kusano 7d535a
kusano 7d535a
    for (i = n; i < m; i++) temp[i] = 0.0;
kusano 7d535a
kusano 7d535a
    if (r == 0)
kusano 7d535a
    {
kusano 7d535a
	*nnzLj += m * n;
kusano 7d535a
	return 0;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* add dropped entries to the diagnal */
kusano 7d535a
    if (milu != SILU)
kusano 7d535a
    {
kusano 7d535a
	register int j;
kusano 7d535a
	float t;
kusano 7d535a
	float omega;
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	{
kusano 7d535a
	    t = lusup[xlusup_first + (m - 1) + j * m];
kusano 7d535a
            if (t == zero) continue;
kusano 7d535a
	    if (t > zero)
kusano 7d535a
		omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / t, 1.0);
kusano 7d535a
	    else
kusano 7d535a
		omega = SUPERLU_MAX(2.0 * (1.0 - alpha) / t, -1.0);
kusano 7d535a
	    t *= omega;
kusano 7d535a
kusano 7d535a
 	    switch (milu)
kusano 7d535a
	    {
kusano 7d535a
		case SMILU_1:
kusano 7d535a
		    if (t != none) {
kusano 7d535a
			lusup[xlusup_first + j * inc_diag] *= (one + t);
kusano 7d535a
                    }
kusano 7d535a
		    else
kusano 7d535a
		    {
kusano 7d535a
			lusup[xlusup_first + j * inc_diag] *= *fill_tol;
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
			printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
kusano 7d535a
			fflush(stdout);
kusano 7d535a
#endif
kusano 7d535a
			nzp++;
kusano 7d535a
		    }
kusano 7d535a
		    break;
kusano 7d535a
		case SMILU_2:
kusano 7d535a
		    lusup[xlusup_first + j * inc_diag] *= (1.0 + fabs(t));
kusano 7d535a
		    break;
kusano 7d535a
		case SMILU_3:
kusano 7d535a
		    lusup[xlusup_first + j * inc_diag] *= (one + t);
kusano 7d535a
		    break;
kusano 7d535a
		case SILU:
kusano 7d535a
		default:
kusano 7d535a
		    break;
kusano 7d535a
	    }
kusano 7d535a
	}
kusano 7d535a
	if (nzp > 0) *fill_tol = -nzp;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* Remove dropped entries from the memory and fix the pointers. */
kusano 7d535a
    m1 = m - r;
kusano 7d535a
    for (j = 1; j < n; j++)
kusano 7d535a
    {
kusano 7d535a
	register int tmp1, tmp2;
kusano 7d535a
	tmp1 = xlusup_first + j * m1;
kusano 7d535a
	tmp2 = xlusup_first + j * m;
kusano 7d535a
	for (i = 0; i < m1; i++)
kusano 7d535a
	    lusup[i + tmp1] = lusup[i + tmp2];
kusano 7d535a
    }
kusano 7d535a
    for (i = 0; i < nzlc; i++)
kusano 7d535a
	lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
kusano 7d535a
    for (i = 0; i < nzlc; i++)
kusano 7d535a
	lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
kusano 7d535a
    for (i = first + 1; i <= last + 1; i++)
kusano 7d535a
    {
kusano 7d535a
	xlusup[i] -= r * (i - first);
kusano 7d535a
	xlsub[i] -= r;
kusano 7d535a
    }
kusano 7d535a
    if (lastc)
kusano 7d535a
    {
kusano 7d535a
	xlusup[last + 2] -= r * n;
kusano 7d535a
	xlsub[last + 2] -= r;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    *nnzLj += (m - r) * n;
kusano 7d535a
    return r;
kusano 7d535a
}