|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file ilu_cdrop_row.c
|
|
kusano |
7d535a |
* \brief Drop small rows from L
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 4.1) --
|
|
kusano |
7d535a |
* Lawrence Berkeley National Laboratory.
|
|
kusano |
7d535a |
* June 30, 2009
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include <math.h></math.h>
|
|
kusano |
7d535a |
#include <stdlib.h></stdlib.h>
|
|
kusano |
7d535a |
#include "slu_cdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
extern void cswap_(int *, complex [], int *, complex [], int *);
|
|
kusano |
7d535a |
extern void caxpy_(int *, complex *, complex [], int *, complex [], int *);
|
|
kusano |
7d535a |
extern void ccopy_(int *, complex [], int *, complex [], int *);
|
|
kusano |
7d535a |
extern float scasum_(int *, complex *, int *);
|
|
kusano |
7d535a |
extern float scnrm2_(int *, complex *, int *);
|
|
kusano |
7d535a |
extern double dnrm2_(int *, double [], int *);
|
|
kusano |
7d535a |
extern int icamax_(int *, complex [], int *);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
static float *A; /* used in _compare_ only */
|
|
kusano |
7d535a |
static int _compare_(const void *a, const void *b)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int *x = (int *)a, *y = (int *)b;
|
|
kusano |
7d535a |
if (A[*x] - A[*y] > 0.0) return -1;
|
|
kusano |
7d535a |
else if (A[*x] - A[*y] < 0.0) return 1;
|
|
kusano |
7d535a |
else return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
* ilu_cdrop_row() - Drop some small rows from the previous
|
|
kusano |
7d535a |
* supernode (L-part only).
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int ilu_cdrop_row(
|
|
kusano |
7d535a |
superlu_options_t *options, /* options */
|
|
kusano |
7d535a |
int first, /* index of the first column in the supernode */
|
|
kusano |
7d535a |
int last, /* index of the last column in the supernode */
|
|
kusano |
7d535a |
double drop_tol, /* dropping parameter */
|
|
kusano |
7d535a |
int quota, /* maximum nonzero entries allowed */
|
|
kusano |
7d535a |
int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */
|
|
kusano |
7d535a |
double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots,
|
|
kusano |
7d535a |
* does not change if options->ILU_MILU != SMILU1 */
|
|
kusano |
7d535a |
GlobalLU_t *Glu, /* modified */
|
|
kusano |
7d535a |
float swork[], /* working space
|
|
kusano |
7d535a |
* the length of swork[] should be no less than
|
|
kusano |
7d535a |
* the number of rows in the supernode */
|
|
kusano |
7d535a |
float swork2[], /* working space with the same size as swork[],
|
|
kusano |
7d535a |
* used only by the second dropping rule */
|
|
kusano |
7d535a |
int lastc /* if lastc == 0, there is nothing after the
|
|
kusano |
7d535a |
* working supernode [first:last];
|
|
kusano |
7d535a |
* if lastc == 1, there is one more column after
|
|
kusano |
7d535a |
* the working supernode. */ )
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int i, j, k, m1;
|
|
kusano |
7d535a |
register int nzlc; /* number of nonzeros in column last+1 */
|
|
kusano |
7d535a |
register int xlusup_first, xlsub_first;
|
|
kusano |
7d535a |
int m, n; /* m x n is the size of the supernode */
|
|
kusano |
7d535a |
int r = 0; /* number of dropped rows */
|
|
kusano |
7d535a |
register float *temp;
|
|
kusano |
7d535a |
register complex *lusup = Glu->lusup;
|
|
kusano |
7d535a |
register int *lsub = Glu->lsub;
|
|
kusano |
7d535a |
register int *xlsub = Glu->xlsub;
|
|
kusano |
7d535a |
register int *xlusup = Glu->xlusup;
|
|
kusano |
7d535a |
register float d_max = 0.0, d_min = 1.0;
|
|
kusano |
7d535a |
int drop_rule = options->ILU_DropRule;
|
|
kusano |
7d535a |
milu_t milu = options->ILU_MILU;
|
|
kusano |
7d535a |
norm_t nrm = options->ILU_Norm;
|
|
kusano |
7d535a |
complex zero = {0.0, 0.0};
|
|
kusano |
7d535a |
complex one = {1.0, 0.0};
|
|
kusano |
7d535a |
complex none = {-1.0, 0.0};
|
|
kusano |
7d535a |
int i_1 = 1;
|
|
kusano |
7d535a |
int inc_diag; /* inc_diag = m + 1 */
|
|
kusano |
7d535a |
int nzp = 0; /* number of zero pivots */
|
|
kusano |
7d535a |
float alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
xlusup_first = xlusup[first];
|
|
kusano |
7d535a |
xlsub_first = xlsub[first];
|
|
kusano |
7d535a |
m = xlusup[first + 1] - xlusup_first;
|
|
kusano |
7d535a |
n = last - first + 1;
|
|
kusano |
7d535a |
m1 = m - 1;
|
|
kusano |
7d535a |
inc_diag = m + 1;
|
|
kusano |
7d535a |
nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
|
|
kusano |
7d535a |
temp = swork - n;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Quick return if nothing to do. */
|
|
kusano |
7d535a |
if (m == 0 || m == n || drop_rule == NODROP)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
*nnzLj += m * n;
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* basic dropping: ILU(tau) */
|
|
kusano |
7d535a |
for (i = n; i <= m1; )
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/* the average abs value of ith row */
|
|
kusano |
7d535a |
switch (nrm)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
case ONE_NORM:
|
|
kusano |
7d535a |
temp[i] = scasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case TWO_NORM:
|
|
kusano |
7d535a |
temp[i] = scnrm2_(&n, &lusup[xlusup_first + i], &m)
|
|
kusano |
7d535a |
/ sqrt((double)n);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case INF_NORM:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
k = icamax_(&n, &lusup[xlusup_first + i], &m) - 1;
|
|
kusano |
7d535a |
temp[i] = c_abs1(&lusup[xlusup_first + i + m * k]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* drop small entries due to drop_tol */
|
|
kusano |
7d535a |
if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
r++;
|
|
kusano |
7d535a |
/* drop the current row and move the last undropped row here */
|
|
kusano |
7d535a |
if (r > 1) /* add to last row */
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/* accumulate the sum (for MILU) */
|
|
kusano |
7d535a |
switch (milu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
caxpy_(&n, &one, &lusup[xlusup_first + i], &m,
|
|
kusano |
7d535a |
&lusup[xlusup_first + m - 1], &m);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
for (j = 0; j < n; j++)
|
|
kusano |
7d535a |
lusup[xlusup_first + (m - 1) + j * m].r +=
|
|
kusano |
7d535a |
c_abs1(&lusup[xlusup_first + i + j * m]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
ccopy_(&n, &lusup[xlusup_first + m1], &m,
|
|
kusano |
7d535a |
&lusup[xlusup_first + i], &m);
|
|
kusano |
7d535a |
} /* if (r > 1) */
|
|
kusano |
7d535a |
else /* move to last row */
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
cswap_(&n, &lusup[xlusup_first + m1], &m,
|
|
kusano |
7d535a |
&lusup[xlusup_first + i], &m);
|
|
kusano |
7d535a |
if (milu == SMILU_3)
|
|
kusano |
7d535a |
for (j = 0; j < n; j++) {
|
|
kusano |
7d535a |
lusup[xlusup_first + m1 + j * m].r =
|
|
kusano |
7d535a |
c_abs1(&lusup[xlusup_first + m1 + j * m]);
|
|
kusano |
7d535a |
lusup[xlusup_first + m1 + j * m].i = 0.0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
lsub[xlsub_first + i] = lsub[xlsub_first + m1];
|
|
kusano |
7d535a |
m1--;
|
|
kusano |
7d535a |
continue;
|
|
kusano |
7d535a |
} /* if dropping */
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
if (temp[i] > d_max) d_max = temp[i];
|
|
kusano |
7d535a |
if (temp[i] < d_min) d_min = temp[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
i++;
|
|
kusano |
7d535a |
} /* for */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Secondary dropping: drop more rows according to the quota. */
|
|
kusano |
7d535a |
quota = ceil((double)quota / (double)n);
|
|
kusano |
7d535a |
if (drop_rule & DROP_SECONDARY && m - r > quota)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register double tol = d_max;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Calculate the second dropping tolerance */
|
|
kusano |
7d535a |
if (quota > n)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
if (drop_rule & DROP_INTERP) /* by interpolation */
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
d_max = 1.0 / d_max; d_min = 1.0 / d_min;
|
|
kusano |
7d535a |
tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
else /* by quick select */
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
int len = m1 - n + 1;
|
|
kusano |
7d535a |
scopy_(&len, swork, &i_1, swork2, &i_1);
|
|
kusano |
7d535a |
tol = sqselect(len, swork2, quota - n);
|
|
kusano |
7d535a |
#if 0
|
|
kusano |
7d535a |
register int *itemp = iwork - n;
|
|
kusano |
7d535a |
A = temp;
|
|
kusano |
7d535a |
for (i = n; i <= m1; i++) itemp[i] = i;
|
|
kusano |
7d535a |
qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
|
|
kusano |
7d535a |
tol = temp[itemp[quota]];
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (i = n; i <= m1; )
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
if (temp[i] <= tol)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int j;
|
|
kusano |
7d535a |
r++;
|
|
kusano |
7d535a |
/* drop the current row and move the last undropped row here */
|
|
kusano |
7d535a |
if (r > 1) /* add to last row */
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/* accumulate the sum (for MILU) */
|
|
kusano |
7d535a |
switch (milu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
caxpy_(&n, &one, &lusup[xlusup_first + i], &m,
|
|
kusano |
7d535a |
&lusup[xlusup_first + m - 1], &m);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
for (j = 0; j < n; j++)
|
|
kusano |
7d535a |
lusup[xlusup_first + (m - 1) + j * m].r +=
|
|
kusano |
7d535a |
c_abs1(&lusup[xlusup_first + i + j * m]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
ccopy_(&n, &lusup[xlusup_first + m1], &m,
|
|
kusano |
7d535a |
&lusup[xlusup_first + i], &m);
|
|
kusano |
7d535a |
} /* if (r > 1) */
|
|
kusano |
7d535a |
else /* move to last row */
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
cswap_(&n, &lusup[xlusup_first + m1], &m,
|
|
kusano |
7d535a |
&lusup[xlusup_first + i], &m);
|
|
kusano |
7d535a |
if (milu == SMILU_3)
|
|
kusano |
7d535a |
for (j = 0; j < n; j++) {
|
|
kusano |
7d535a |
lusup[xlusup_first + m1 + j * m].r =
|
|
kusano |
7d535a |
c_abs1(&lusup[xlusup_first + m1 + j * m]);
|
|
kusano |
7d535a |
lusup[xlusup_first + m1 + j * m].i = 0.0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
lsub[xlsub_first + i] = lsub[xlsub_first + m1];
|
|
kusano |
7d535a |
m1--;
|
|
kusano |
7d535a |
temp[i] = temp[m1];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
continue;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
i++;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* for */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* if secondary dropping */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (i = n; i < m; i++) temp[i] = 0.0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (r == 0)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
*nnzLj += m * n;
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* add dropped entries to the diagnal */
|
|
kusano |
7d535a |
if (milu != SILU)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int j;
|
|
kusano |
7d535a |
complex t;
|
|
kusano |
7d535a |
float omega;
|
|
kusano |
7d535a |
for (j = 0; j < n; j++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
t = lusup[xlusup_first + (m - 1) + j * m];
|
|
kusano |
7d535a |
if (t.r == 0.0 && t.i == 0.0) continue;
|
|
kusano |
7d535a |
omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / c_abs1(&t), 1.0);
|
|
kusano |
7d535a |
cs_mult(&t, &t, omega);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
switch (milu)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
if ( !(c_eq(&t, &none)) ) {
|
|
kusano |
7d535a |
c_add(&t, &t, &one);
|
|
kusano |
7d535a |
cc_mult(&lusup[xlusup_first + j * inc_diag],
|
|
kusano |
7d535a |
&lusup[xlusup_first + j * inc_diag],
|
|
kusano |
7d535a |
&t);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
cs_mult(
|
|
kusano |
7d535a |
&lusup[xlusup_first + j * inc_diag],
|
|
kusano |
7d535a |
&lusup[xlusup_first + j * inc_diag],
|
|
kusano |
7d535a |
*fill_tol);
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
nzp++;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
cs_mult(&lusup[xlusup_first + j * inc_diag],
|
|
kusano |
7d535a |
&lusup[xlusup_first + j * inc_diag],
|
|
kusano |
7d535a |
1.0 + c_abs1(&t));
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
c_add(&t, &t, &one);
|
|
kusano |
7d535a |
cc_mult(&lusup[xlusup_first + j * inc_diag],
|
|
kusano |
7d535a |
&lusup[xlusup_first + j * inc_diag],
|
|
kusano |
7d535a |
&t);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (nzp > 0) *fill_tol = -nzp;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Remove dropped entries from the memory and fix the pointers. */
|
|
kusano |
7d535a |
m1 = m - r;
|
|
kusano |
7d535a |
for (j = 1; j < n; j++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
register int tmp1, tmp2;
|
|
kusano |
7d535a |
tmp1 = xlusup_first + j * m1;
|
|
kusano |
7d535a |
tmp2 = xlusup_first + j * m;
|
|
kusano |
7d535a |
for (i = 0; i < m1; i++)
|
|
kusano |
7d535a |
lusup[i + tmp1] = lusup[i + tmp2];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
for (i = 0; i < nzlc; i++)
|
|
kusano |
7d535a |
lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
|
|
kusano |
7d535a |
for (i = 0; i < nzlc; i++)
|
|
kusano |
7d535a |
lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
|
|
kusano |
7d535a |
for (i = first + 1; i <= last + 1; i++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
xlusup[i] -= r * (i - first);
|
|
kusano |
7d535a |
xlsub[i] -= r;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (lastc)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
xlusup[last + 2] -= r * n;
|
|
kusano |
7d535a |
xlsub[last + 2] -= r;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
*nnzLj += (m - r) * n;
|
|
kusano |
7d535a |
return r;
|
|
kusano |
7d535a |
}
|