|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file ilu_zcopy_to_ucol.c
|
|
kusano |
7d535a |
* \brief Copy a computed column of U to the compressed data structure
|
|
kusano |
7d535a |
* and drop some small entries
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 4.0) --
|
|
kusano |
7d535a |
* Lawrence Berkeley National Laboratory
|
|
kusano |
7d535a |
* June 30, 2009
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include "slu_zdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
int num_drop_U;
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
static doublecomplex *A; /* used in _compare_ only */
|
|
kusano |
7d535a |
static int _compare_(const void *a, const void *b)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int *x = (int *)a, *y = (int *)b;
|
|
kusano |
7d535a |
register double xx = z_abs1(&A[*x]), yy = z_abs1(&A[*y]);
|
|
kusano |
7d535a |
if (xx > yy) return -1;
|
|
kusano |
7d535a |
else if (xx < yy) return 1;
|
|
kusano |
7d535a |
else return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
ilu_zcopy_to_ucol(
|
|
kusano |
7d535a |
int jcol, /* in */
|
|
kusano |
7d535a |
int nseg, /* in */
|
|
kusano |
7d535a |
int *segrep, /* in */
|
|
kusano |
7d535a |
int *repfnz, /* in */
|
|
kusano |
7d535a |
int *perm_r, /* in */
|
|
kusano |
7d535a |
doublecomplex *dense, /* modified - reset to zero on return */
|
|
kusano |
7d535a |
int drop_rule,/* in */
|
|
kusano |
7d535a |
milu_t milu, /* in */
|
|
kusano |
7d535a |
double drop_tol, /* in */
|
|
kusano |
7d535a |
int quota, /* maximum nonzero entries allowed */
|
|
kusano |
7d535a |
doublecomplex *sum, /* out - the sum of dropped entries */
|
|
kusano |
7d535a |
int *nnzUj, /* in - out */
|
|
kusano |
7d535a |
GlobalLU_t *Glu, /* modified */
|
|
kusano |
7d535a |
int *work /* working space with minimum size n,
|
|
kusano |
7d535a |
* used by the second dropping rule */
|
|
kusano |
7d535a |
)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Gather from SPA dense[*] to global ucol[*].
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int ksub, krep, ksupno;
|
|
kusano |
7d535a |
int i, k, kfnz, segsze;
|
|
kusano |
7d535a |
int fsupc, isub, irow;
|
|
kusano |
7d535a |
int jsupno, nextu;
|
|
kusano |
7d535a |
int new_next, mem_error;
|
|
kusano |
7d535a |
int *xsup, *supno;
|
|
kusano |
7d535a |
int *lsub, *xlsub;
|
|
kusano |
7d535a |
doublecomplex *ucol;
|
|
kusano |
7d535a |
int *usub, *xusub;
|
|
kusano |
7d535a |
int nzumax;
|
|
kusano |
7d535a |
int m; /* number of entries in the nonzero U-segments */
|
|
kusano |
7d535a |
register double d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum");
|
|
kusano |
7d535a |
register double tmp;
|
|
kusano |
7d535a |
doublecomplex zero = {0.0, 0.0};
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
xsup = Glu->xsup;
|
|
kusano |
7d535a |
supno = Glu->supno;
|
|
kusano |
7d535a |
lsub = Glu->lsub;
|
|
kusano |
7d535a |
xlsub = Glu->xlsub;
|
|
kusano |
7d535a |
ucol = Glu->ucol;
|
|
kusano |
7d535a |
usub = Glu->usub;
|
|
kusano |
7d535a |
xusub = Glu->xusub;
|
|
kusano |
7d535a |
nzumax = Glu->nzumax;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
*sum = zero;
|
|
kusano |
7d535a |
if (drop_rule == NODROP) {
|
|
kusano |
7d535a |
drop_tol = -1.0, quota = Glu->n;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
jsupno = supno[jcol];
|
|
kusano |
7d535a |
nextu = xusub[jcol];
|
|
kusano |
7d535a |
k = nseg - 1;
|
|
kusano |
7d535a |
for (ksub = 0; ksub < nseg; ksub++) {
|
|
kusano |
7d535a |
krep = segrep[k--];
|
|
kusano |
7d535a |
ksupno = supno[krep];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( ksupno != jsupno ) { /* Should go into ucol[] */
|
|
kusano |
7d535a |
kfnz = repfnz[krep];
|
|
kusano |
7d535a |
if ( kfnz != EMPTY ) { /* Nonzero U-segment */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
fsupc = xsup[ksupno];
|
|
kusano |
7d535a |
isub = xlsub[fsupc] + kfnz - fsupc;
|
|
kusano |
7d535a |
segsze = krep - kfnz + 1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
new_next = nextu + segsze;
|
|
kusano |
7d535a |
while ( new_next > nzumax ) {
|
|
kusano |
7d535a |
if ((mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax,
|
|
kusano |
7d535a |
Glu)) != 0)
|
|
kusano |
7d535a |
return (mem_error);
|
|
kusano |
7d535a |
ucol = Glu->ucol;
|
|
kusano |
7d535a |
if ((mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax,
|
|
kusano |
7d535a |
Glu)) != 0)
|
|
kusano |
7d535a |
return (mem_error);
|
|
kusano |
7d535a |
usub = Glu->usub;
|
|
kusano |
7d535a |
lsub = Glu->lsub;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (i = 0; i < segsze; i++) {
|
|
kusano |
7d535a |
irow = lsub[isub++];
|
|
kusano |
7d535a |
tmp = z_abs1(&dense[irow]);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* first dropping rule */
|
|
kusano |
7d535a |
if (quota > 0 && tmp >= drop_tol) {
|
|
kusano |
7d535a |
if (tmp > d_max) d_max = tmp;
|
|
kusano |
7d535a |
if (tmp < d_min) d_min = tmp;
|
|
kusano |
7d535a |
usub[nextu] = perm_r[irow];
|
|
kusano |
7d535a |
ucol[nextu] = dense[irow];
|
|
kusano |
7d535a |
nextu++;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
switch (milu) {
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
z_add(sum, sum, &dense[irow]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
/* *sum += fabs(dense[irow]);*/
|
|
kusano |
7d535a |
sum->r += tmp;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
num_drop_U++;
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
dense[irow] = zero;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* for each segment... */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
xusub[jcol + 1] = nextu; /* Close U[*,jcol] */
|
|
kusano |
7d535a |
m = xusub[jcol + 1] - xusub[jcol];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* second dropping rule */
|
|
kusano |
7d535a |
if (drop_rule & DROP_SECONDARY && m > quota) {
|
|
kusano |
7d535a |
register double tol = d_max;
|
|
kusano |
7d535a |
register int m0 = xusub[jcol] + m - 1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (quota > 0) {
|
|
kusano |
7d535a |
if (drop_rule & DROP_INTERP) {
|
|
kusano |
7d535a |
d_max = 1.0 / d_max; d_min = 1.0 / d_min;
|
|
kusano |
7d535a |
tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
A = &ucol[xusub[jcol]];
|
|
kusano |
7d535a |
for (i = 0; i < m; i++) work[i] = i;
|
|
kusano |
7d535a |
qsort(work, m, sizeof(int), _compare_);
|
|
kusano |
7d535a |
tol = fabs(usub[xusub[jcol] + work[quota]]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
for (i = xusub[jcol]; i <= m0; ) {
|
|
kusano |
7d535a |
if (z_abs1(&ucol[i]) <= tol) {
|
|
kusano |
7d535a |
switch (milu) {
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
z_add(sum, sum, &ucol[i]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
sum->r += tmp;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
ucol[i] = ucol[m0];
|
|
kusano |
7d535a |
usub[i] = usub[m0];
|
|
kusano |
7d535a |
m0--;
|
|
kusano |
7d535a |
m--;
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
num_drop_U++;
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
xusub[jcol + 1]--;
|
|
kusano |
7d535a |
continue;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
i++;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (milu == SMILU_2) {
|
|
kusano |
7d535a |
sum->r = z_abs1(sum); sum->i = 0.0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (milu == SMILU_3) sum->i = 0.0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
*nnzUj += m;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|