kusano 7d535a
kusano 7d535a
/*! @file ilu_scopy_to_ucol.c
kusano 7d535a
 * \brief Copy a computed column of U to the compressed data structure
kusano 7d535a
 * and drop some small entries
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 4.1) --
kusano 7d535a
 * Lawrence Berkeley National Laboratory
kusano 7d535a
 * November, 2010
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
#include "slu_sdefs.h"
kusano 7d535a
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
int num_drop_U;
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
extern void scopy_(int *, float [], int *, float [], int *);
kusano 7d535a
kusano 7d535a
#if 0
kusano 7d535a
static float *A;  /* used in _compare_ only */
kusano 7d535a
static int _compare_(const void *a, const void *b)
kusano 7d535a
{
kusano 7d535a
    register int *x = (int *)a, *y = (int *)b;
kusano 7d535a
    register double xx = fabs(A[*x]), yy = fabs(A[*y]);
kusano 7d535a
    if (xx > yy) return -1;
kusano 7d535a
    else if (xx < yy) return 1;
kusano 7d535a
    else return 0;
kusano 7d535a
}
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
int
kusano 7d535a
ilu_scopy_to_ucol(
kusano 7d535a
	      int	 jcol,	   /* in */
kusano 7d535a
	      int	 nseg,	   /* in */
kusano 7d535a
	      int	 *segrep,  /* in */
kusano 7d535a
	      int	 *repfnz,  /* in */
kusano 7d535a
	      int	 *perm_r,  /* in */
kusano 7d535a
	      float	 *dense,   /* modified - reset to zero on return */
kusano 7d535a
	      int  	 drop_rule,/* in */
kusano 7d535a
	      milu_t	 milu,	   /* in */
kusano 7d535a
	      double	 drop_tol, /* in */
kusano 7d535a
	      int	 quota,    /* maximum nonzero entries allowed */
kusano 7d535a
	      float	 *sum,	   /* out - the sum of dropped entries */
kusano 7d535a
	      int	 *nnzUj,   /* in - out */
kusano 7d535a
	      GlobalLU_t *Glu,	   /* modified */
kusano 7d535a
	      float	 *work	   /* working space with minimum size n,
kusano 7d535a
				    * used by the second dropping rule */
kusano 7d535a
	      )
kusano 7d535a
{
kusano 7d535a
/*
kusano 7d535a
 * Gather from SPA dense[*] to global ucol[*].
kusano 7d535a
 */
kusano 7d535a
    int       ksub, krep, ksupno;
kusano 7d535a
    int       i, k, kfnz, segsze;
kusano 7d535a
    int       fsupc, isub, irow;
kusano 7d535a
    int       jsupno, nextu;
kusano 7d535a
    int       new_next, mem_error;
kusano 7d535a
    int       *xsup, *supno;
kusano 7d535a
    int       *lsub, *xlsub;
kusano 7d535a
    float    *ucol;
kusano 7d535a
    int       *usub, *xusub;
kusano 7d535a
    int       nzumax;
kusano 7d535a
    int       m; /* number of entries in the nonzero U-segments */
kusano 7d535a
    register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum");
kusano 7d535a
    register double tmp;
kusano 7d535a
    float zero = 0.0;
kusano 7d535a
    int i_1 = 1;
kusano 7d535a
kusano 7d535a
    xsup    = Glu->xsup;
kusano 7d535a
    supno   = Glu->supno;
kusano 7d535a
    lsub    = Glu->lsub;
kusano 7d535a
    xlsub   = Glu->xlsub;
kusano 7d535a
    ucol    = Glu->ucol;
kusano 7d535a
    usub    = Glu->usub;
kusano 7d535a
    xusub   = Glu->xusub;
kusano 7d535a
    nzumax  = Glu->nzumax;
kusano 7d535a
kusano 7d535a
    *sum = zero;
kusano 7d535a
    if (drop_rule == NODROP) {
kusano 7d535a
	drop_tol = -1.0, quota = Glu->n;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    jsupno = supno[jcol];
kusano 7d535a
    nextu  = xusub[jcol];
kusano 7d535a
    k = nseg - 1;
kusano 7d535a
    for (ksub = 0; ksub < nseg; ksub++) {
kusano 7d535a
	krep = segrep[k--];
kusano 7d535a
	ksupno = supno[krep];
kusano 7d535a
kusano 7d535a
	if ( ksupno != jsupno ) { /* Should go into ucol[] */
kusano 7d535a
	    kfnz = repfnz[krep];
kusano 7d535a
	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */
kusano 7d535a
kusano 7d535a
		fsupc = xsup[ksupno];
kusano 7d535a
		isub = xlsub[fsupc] + kfnz - fsupc;
kusano 7d535a
		segsze = krep - kfnz + 1;
kusano 7d535a
kusano 7d535a
		new_next = nextu + segsze;
kusano 7d535a
		while ( new_next > nzumax ) {
kusano 7d535a
		    if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax,
kusano 7d535a
			    Glu)) != 0)
kusano 7d535a
			return (mem_error);
kusano 7d535a
		    ucol = Glu->ucol;
kusano 7d535a
		    if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax,
kusano 7d535a
			    Glu)) != 0)
kusano 7d535a
			return (mem_error);
kusano 7d535a
		    usub = Glu->usub;
kusano 7d535a
		    lsub = Glu->lsub;
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
		for (i = 0; i < segsze; i++) {
kusano 7d535a
		    irow = lsub[isub++];
kusano 7d535a
		    tmp = fabs(dense[irow]);
kusano 7d535a
kusano 7d535a
		    /* first dropping rule */
kusano 7d535a
		    if (quota > 0 && tmp >= drop_tol) {
kusano 7d535a
			if (tmp > d_max) d_max = tmp;
kusano 7d535a
			if (tmp < d_min) d_min = tmp;
kusano 7d535a
			usub[nextu] = perm_r[irow];
kusano 7d535a
			ucol[nextu] = dense[irow];
kusano 7d535a
			nextu++;
kusano 7d535a
		    } else {
kusano 7d535a
			switch (milu) {
kusano 7d535a
			    case SMILU_1:
kusano 7d535a
			    case SMILU_2:
kusano 7d535a
				*sum += dense[irow];
kusano 7d535a
				break;
kusano 7d535a
			    case SMILU_3:
kusano 7d535a
				/* *sum += fabs(dense[irow]);*/
kusano 7d535a
				*sum += tmp;
kusano 7d535a
				break;
kusano 7d535a
			    case SILU:
kusano 7d535a
			    default:
kusano 7d535a
				break;
kusano 7d535a
			}
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
			num_drop_U++;
kusano 7d535a
#endif
kusano 7d535a
		    }
kusano 7d535a
		    dense[irow] = zero;
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
    } /* for each segment... */
kusano 7d535a
kusano 7d535a
    xusub[jcol + 1] = nextu;	  /* Close U[*,jcol] */
kusano 7d535a
    m = xusub[jcol + 1] - xusub[jcol];
kusano 7d535a
kusano 7d535a
    /* second dropping rule */
kusano 7d535a
    if (drop_rule & DROP_SECONDARY && m > quota) {
kusano 7d535a
	register double tol = d_max;
kusano 7d535a
	register int m0 = xusub[jcol] + m - 1;
kusano 7d535a
kusano 7d535a
	if (quota > 0) {
kusano 7d535a
	    if (drop_rule & DROP_INTERP) {
kusano 7d535a
		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
kusano 7d535a
		tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
kusano 7d535a
	    } else {
kusano 7d535a
		scopy_(&m, &ucol[xusub[jcol]], &i_1, work, &i_1);
kusano 7d535a
		tol = sqselect(m, work, quota);
kusano 7d535a
#if 0
kusano 7d535a
		A = &ucol[xusub[jcol]];
kusano 7d535a
		for (i = 0; i < m; i++) work[i] = i;
kusano 7d535a
		qsort(work, m, sizeof(int), _compare_);
kusano 7d535a
		tol = fabs(usub[xusub[jcol] + work[quota]]);
kusano 7d535a
#endif
kusano 7d535a
	    }
kusano 7d535a
	}
kusano 7d535a
	for (i = xusub[jcol]; i <= m0; ) {
kusano 7d535a
	    if (fabs(ucol[i]) <= tol) {
kusano 7d535a
		switch (milu) {
kusano 7d535a
		    case SMILU_1:
kusano 7d535a
		    case SMILU_2:
kusano 7d535a
			*sum += ucol[i];
kusano 7d535a
			break;
kusano 7d535a
		    case SMILU_3:
kusano 7d535a
			*sum += fabs(ucol[i]);
kusano 7d535a
			break;
kusano 7d535a
		    case SILU:
kusano 7d535a
		    default:
kusano 7d535a
			break;
kusano 7d535a
		}
kusano 7d535a
		ucol[i] = ucol[m0];
kusano 7d535a
		usub[i] = usub[m0];
kusano 7d535a
		m0--;
kusano 7d535a
		m--;
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
		num_drop_U++;
kusano 7d535a
#endif
kusano 7d535a
		xusub[jcol + 1]--;
kusano 7d535a
		continue;
kusano 7d535a
	    }
kusano 7d535a
	    i++;
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if (milu == SMILU_2) *sum = fabs(*sum);
kusano 7d535a
kusano 7d535a
    *nnzUj += m;
kusano 7d535a
kusano 7d535a
    return 0;
kusano 7d535a
}