|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file ilu_zpivotL.c
|
|
kusano |
7d535a |
* \brief Performs numerical pivoting
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 4.0) --
|
|
kusano |
7d535a |
* Lawrence Berkeley National Laboratory
|
|
kusano |
7d535a |
* June 30, 2009
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include <math.h></math.h>
|
|
kusano |
7d535a |
#include <stdlib.h></stdlib.h>
|
|
kusano |
7d535a |
#include "slu_zdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#ifndef SGN
|
|
kusano |
7d535a |
#define SGN(x) ((x)>=0?1:-1)
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
* Performs the numerical pivoting on the current column of L,
|
|
kusano |
7d535a |
* and the CDIV operation.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Pivot policy:
|
|
kusano |
7d535a |
* (1) Compute thresh = u * max_(i>=j) abs(A_ij);
|
|
kusano |
7d535a |
* (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
|
|
kusano |
7d535a |
* pivot row = k;
|
|
kusano |
7d535a |
* ELSE IF abs(A_jj) >= thresh THEN
|
|
kusano |
7d535a |
* pivot row = j;
|
|
kusano |
7d535a |
* ELSE
|
|
kusano |
7d535a |
* pivot row = m;
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Note: If you absolutely want to use a given pivot order, then set u=0.0.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Return value: 0 success;
|
|
kusano |
7d535a |
* i > 0 U(i,i) is exactly zero.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
ilu_zpivotL(
|
|
kusano |
7d535a |
const int jcol, /* in */
|
|
kusano |
7d535a |
const double u, /* in - diagonal pivoting threshold */
|
|
kusano |
7d535a |
int *usepr, /* re-use the pivot sequence given by
|
|
kusano |
7d535a |
* perm_r/iperm_r */
|
|
kusano |
7d535a |
int *perm_r, /* may be modified */
|
|
kusano |
7d535a |
int diagind, /* diagonal of Pc*A*Pc' */
|
|
kusano |
7d535a |
int *swap, /* in/out record the row permutation */
|
|
kusano |
7d535a |
int *iswap, /* in/out inverse of swap, it is the same as
|
|
kusano |
7d535a |
perm_r after the factorization */
|
|
kusano |
7d535a |
int *marker, /* in */
|
|
kusano |
7d535a |
int *pivrow, /* in/out, as an input if *usepr!=0 */
|
|
kusano |
7d535a |
double fill_tol, /* in - fill tolerance of current column
|
|
kusano |
7d535a |
* used for a singular column */
|
|
kusano |
7d535a |
milu_t milu, /* in */
|
|
kusano |
7d535a |
doublecomplex drop_sum, /* in - computed in ilu_zcopy_to_ucol()
|
|
kusano |
7d535a |
(MILU only) */
|
|
kusano |
7d535a |
GlobalLU_t *Glu, /* modified - global LU data structures */
|
|
kusano |
7d535a |
SuperLUStat_t *stat /* output */
|
|
kusano |
7d535a |
)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int n; /* number of columns */
|
|
kusano |
7d535a |
int fsupc; /* first column in the supernode */
|
|
kusano |
7d535a |
int nsupc; /* no of columns in the supernode */
|
|
kusano |
7d535a |
int nsupr; /* no of rows in the supernode */
|
|
kusano |
7d535a |
int lptr; /* points to the starting subscript of the supernode */
|
|
kusano |
7d535a |
register int pivptr;
|
|
kusano |
7d535a |
int old_pivptr, diag, ptr0;
|
|
kusano |
7d535a |
register double pivmax, rtemp;
|
|
kusano |
7d535a |
double thresh;
|
|
kusano |
7d535a |
doublecomplex temp;
|
|
kusano |
7d535a |
doublecomplex *lu_sup_ptr;
|
|
kusano |
7d535a |
doublecomplex *lu_col_ptr;
|
|
kusano |
7d535a |
int *lsub_ptr;
|
|
kusano |
7d535a |
register int isub, icol, k, itemp;
|
|
kusano |
7d535a |
int *lsub, *xlsub;
|
|
kusano |
7d535a |
doublecomplex *lusup;
|
|
kusano |
7d535a |
int *xlusup;
|
|
kusano |
7d535a |
flops_t *ops = stat->ops;
|
|
kusano |
7d535a |
int info;
|
|
kusano |
7d535a |
doublecomplex one = {1.0, 0.0};
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Initialize pointers */
|
|
kusano |
7d535a |
n = Glu->n;
|
|
kusano |
7d535a |
lsub = Glu->lsub;
|
|
kusano |
7d535a |
xlsub = Glu->xlsub;
|
|
kusano |
7d535a |
lusup = Glu->lusup;
|
|
kusano |
7d535a |
xlusup = Glu->xlusup;
|
|
kusano |
7d535a |
fsupc = (Glu->xsup)[(Glu->supno)[jcol]];
|
|
kusano |
7d535a |
nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */
|
|
kusano |
7d535a |
lptr = xlsub[fsupc];
|
|
kusano |
7d535a |
nsupr = xlsub[fsupc+1] - lptr;
|
|
kusano |
7d535a |
lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */
|
|
kusano |
7d535a |
lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
|
|
kusano |
7d535a |
lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Determine the largest abs numerical value for partial pivoting;
|
|
kusano |
7d535a |
Also search for user-specified pivot, and diagonal element. */
|
|
kusano |
7d535a |
pivmax = -1.0;
|
|
kusano |
7d535a |
pivptr = nsupc;
|
|
kusano |
7d535a |
diag = EMPTY;
|
|
kusano |
7d535a |
old_pivptr = nsupc;
|
|
kusano |
7d535a |
ptr0 = EMPTY;
|
|
kusano |
7d535a |
for (isub = nsupc; isub < nsupr; ++isub) {
|
|
kusano |
7d535a |
if (marker[lsub_ptr[isub]] > jcol)
|
|
kusano |
7d535a |
continue; /* do not overlap with a later relaxed supernode */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
switch (milu) {
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
z_add(&temp, &lu_col_ptr[isub], &drop_sum);
|
|
kusano |
7d535a |
rtemp = z_abs1(&temp);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
/* In this case, drop_sum contains the sum of the abs. value */
|
|
kusano |
7d535a |
rtemp = z_abs1(&lu_col_ptr[isub]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
rtemp = z_abs1(&lu_col_ptr[isub]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (rtemp > pivmax) { pivmax = rtemp; pivptr = isub; }
|
|
kusano |
7d535a |
if (*usepr && lsub_ptr[isub] == *pivrow) old_pivptr = isub;
|
|
kusano |
7d535a |
if (lsub_ptr[isub] == diagind) diag = isub;
|
|
kusano |
7d535a |
if (ptr0 == EMPTY) ptr0 = isub;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (milu == SMILU_2 || milu == SMILU_3) pivmax += drop_sum.r;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Test for singularity */
|
|
kusano |
7d535a |
if (pivmax < 0.0) {
|
|
kusano |
7d535a |
fprintf(stderr, "[0]: jcol=%d, SINGULAR!!!\n", jcol);
|
|
kusano |
7d535a |
fflush(stderr);
|
|
kusano |
7d535a |
exit(1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( pivmax == 0.0 ) {
|
|
kusano |
7d535a |
if (diag != EMPTY)
|
|
kusano |
7d535a |
*pivrow = lsub_ptr[pivptr = diag];
|
|
kusano |
7d535a |
else if (ptr0 != EMPTY)
|
|
kusano |
7d535a |
*pivrow = lsub_ptr[pivptr = ptr0];
|
|
kusano |
7d535a |
else {
|
|
kusano |
7d535a |
/* look for the first row which does not
|
|
kusano |
7d535a |
belong to any later supernodes */
|
|
kusano |
7d535a |
for (icol = jcol; icol < n; icol++)
|
|
kusano |
7d535a |
if (marker[swap[icol]] <= jcol) break;
|
|
kusano |
7d535a |
if (icol >= n) {
|
|
kusano |
7d535a |
fprintf(stderr, "[1]: jcol=%d, SINGULAR!!!\n", jcol);
|
|
kusano |
7d535a |
fflush(stderr);
|
|
kusano |
7d535a |
exit(1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
*pivrow = swap[icol];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* pick up the pivot row */
|
|
kusano |
7d535a |
for (isub = nsupc; isub < nsupr; ++isub)
|
|
kusano |
7d535a |
if ( lsub_ptr[isub] == *pivrow ) { pivptr = isub; break; }
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
pivmax = fill_tol;
|
|
kusano |
7d535a |
lu_col_ptr[pivptr].r = pivmax;
|
|
kusano |
7d535a |
lu_col_ptr[pivptr].i = 0.0;
|
|
kusano |
7d535a |
*usepr = 0;
|
|
kusano |
7d535a |
#ifdef DEBUG
|
|
kusano |
7d535a |
printf("[0] ZERO PIVOT: FILL (%d, %d).\n", *pivrow, jcol);
|
|
kusano |
7d535a |
fflush(stdout);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
info =jcol + 1;
|
|
kusano |
7d535a |
} /* if (*pivrow == 0.0) */
|
|
kusano |
7d535a |
else {
|
|
kusano |
7d535a |
thresh = u * pivmax;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Choose appropriate pivotal element by our policy. */
|
|
kusano |
7d535a |
if ( *usepr ) {
|
|
kusano |
7d535a |
switch (milu) {
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
z_add(&temp, &lu_col_ptr[old_pivptr], &drop_sum);
|
|
kusano |
7d535a |
rtemp = z_abs1(&temp);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
rtemp = z_abs1(&lu_col_ptr[old_pivptr]) + drop_sum.r;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
rtemp = z_abs1(&lu_col_ptr[old_pivptr]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = old_pivptr;
|
|
kusano |
7d535a |
else *usepr = 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( *usepr == 0 ) {
|
|
kusano |
7d535a |
/* Use diagonal pivot? */
|
|
kusano |
7d535a |
if ( diag >= 0 ) { /* diagonal exists */
|
|
kusano |
7d535a |
switch (milu) {
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
z_add(&temp, &lu_col_ptr[diag], &drop_sum);
|
|
kusano |
7d535a |
rtemp = z_abs1(&temp);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
rtemp = z_abs1(&lu_col_ptr[diag]) + drop_sum.r;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
rtemp = z_abs1(&lu_col_ptr[diag]);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
*pivrow = lsub_ptr[pivptr];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
info = 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Reset the diagonal */
|
|
kusano |
7d535a |
switch (milu) {
|
|
kusano |
7d535a |
case SMILU_1:
|
|
kusano |
7d535a |
z_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SMILU_2:
|
|
kusano |
7d535a |
case SMILU_3:
|
|
kusano |
7d535a |
temp = z_sgn(&lu_col_ptr[pivptr]);
|
|
kusano |
7d535a |
zz_mult(&temp, &temp, &drop_sum);
|
|
kusano |
7d535a |
z_add(&lu_col_ptr[pivptr], &lu_col_ptr[pivptr], &drop_sum);
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
case SILU:
|
|
kusano |
7d535a |
default:
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* else */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Record pivot row */
|
|
kusano |
7d535a |
perm_r[*pivrow] = jcol;
|
|
kusano |
7d535a |
if (jcol < n - 1) {
|
|
kusano |
7d535a |
register int t1, t2, t;
|
|
kusano |
7d535a |
t1 = iswap[*pivrow]; t2 = jcol;
|
|
kusano |
7d535a |
if (t1 != t2) {
|
|
kusano |
7d535a |
t = swap[t1]; swap[t1] = swap[t2]; swap[t2] = t;
|
|
kusano |
7d535a |
t1 = swap[t1]; t2 = t;
|
|
kusano |
7d535a |
t = iswap[t1]; iswap[t1] = iswap[t2]; iswap[t2] = t;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} /* if (jcol < n - 1) */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Interchange row subscripts */
|
|
kusano |
7d535a |
if ( pivptr != nsupc ) {
|
|
kusano |
7d535a |
itemp = lsub_ptr[pivptr];
|
|
kusano |
7d535a |
lsub_ptr[pivptr] = lsub_ptr[nsupc];
|
|
kusano |
7d535a |
lsub_ptr[nsupc] = itemp;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Interchange numerical values as well, for the whole snode, such
|
|
kusano |
7d535a |
* that L is indexed the same way as A.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
for (icol = 0; icol <= nsupc; icol++) {
|
|
kusano |
7d535a |
itemp = pivptr + icol * nsupr;
|
|
kusano |
7d535a |
temp = lu_sup_ptr[itemp];
|
|
kusano |
7d535a |
lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
|
|
kusano |
7d535a |
lu_sup_ptr[nsupc + icol*nsupr] = temp;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} /* if */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* cdiv operation */
|
|
kusano |
7d535a |
ops[FACT] += 10 * (nsupr - nsupc);
|
|
kusano |
7d535a |
z_div(&temp, &one, &lu_col_ptr[nsupc]);
|
|
kusano |
7d535a |
for (k = nsupc+1; k < nsupr; k++)
|
|
kusano |
7d535a |
zz_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return info;
|
|
kusano |
7d535a |
}
|