|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 3.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* October 15, 2003
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
#include <math.h></math.h>
|
|
kusano |
7d535a |
#include "slu_cdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int cgst07(trans_t trans, int n, int nrhs, SuperMatrix *A, complex *b,
|
|
kusano |
7d535a |
int ldb, complex *x, int ldx, complex *xact,
|
|
kusano |
7d535a |
int ldxact, float *ferr, float *berr, float *reslts)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
Purpose
|
|
kusano |
7d535a |
=======
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
CGST07 tests the error bounds from iterative refinement for the
|
|
kusano |
7d535a |
computed solution to a system of equations op(A)*X = B, where A is a
|
|
kusano |
7d535a |
general n by n matrix and op(A) = A or A**T, depending on TRANS.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
RESLTS(1) = test of the error bound
|
|
kusano |
7d535a |
= norm(X - XACT) / ( norm(X) * FERR )
|
|
kusano |
7d535a |
A large value is returned if this ratio is not less than one.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
RESLTS(2) = residual from the iterative refinement routine
|
|
kusano |
7d535a |
= the maximum of BERR / ( (n+1)*EPS + (*) ), where
|
|
kusano |
7d535a |
(*) = (n+1)*UNFL / (min_i (abs(op(A))*abs(X) +abs(b))_i )
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Arguments
|
|
kusano |
7d535a |
=========
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
TRANS (input) trans_t
|
|
kusano |
7d535a |
Specifies the form of the system of equations.
|
|
kusano |
7d535a |
= NOTRANS: A *x = b
|
|
kusano |
7d535a |
= TRANS : A'*x = b, where A' is the transpose of A
|
|
kusano |
7d535a |
= CONJ : A'*x = b, where A' is the transpose of A
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
N (input) INT
|
|
kusano |
7d535a |
The number of rows of the matrices X and XACT. N >= 0.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
NRHS (input) INT
|
|
kusano |
7d535a |
The number of columns of the matrices X and XACT. NRHS >= 0.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
A (input) SuperMatrix *, dimension (A->nrow, A->ncol)
|
|
kusano |
7d535a |
The original n by n matrix A.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
B (input) COMPLEX PRECISION array, dimension (LDB,NRHS)
|
|
kusano |
7d535a |
The right hand side vectors for the system of linear
|
|
kusano |
7d535a |
equations.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
LDB (input) INT
|
|
kusano |
7d535a |
The leading dimension of the array B. LDB >= max(1,N).
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
X (input) COMPLEX PRECISION array, dimension (LDX,NRHS)
|
|
kusano |
7d535a |
The computed solution vectors. Each vector is stored as a
|
|
kusano |
7d535a |
column of the matrix X.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
LDX (input) INT
|
|
kusano |
7d535a |
The leading dimension of the array X. LDX >= max(1,N).
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
XACT (input) COMPLEX PRECISION array, dimension (LDX,NRHS)
|
|
kusano |
7d535a |
The exact solution vectors. Each vector is stored as a
|
|
kusano |
7d535a |
column of the matrix XACT.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
LDXACT (input) INT
|
|
kusano |
7d535a |
The leading dimension of the array XACT. LDXACT >= max(1,N).
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
FERR (input) COMPLEX PRECISION array, dimension (NRHS)
|
|
kusano |
7d535a |
The estimated forward error bounds for each solution vector
|
|
kusano |
7d535a |
X. If XTRUE is the true solution, FERR bounds the magnitude
|
|
kusano |
7d535a |
of the largest entry in (X - XTRUE) divided by the magnitude
|
|
kusano |
7d535a |
of the largest entry in X.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
BERR (input) COMPLEX PRECISION array, dimension (NRHS)
|
|
kusano |
7d535a |
The componentwise relative backward error of each solution
|
|
kusano |
7d535a |
vector (i.e., the smallest relative change in any entry of A
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
or B that makes X an exact solution).
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
RESLTS (output) FLOAT PRECISION array, dimension (2)
|
|
kusano |
7d535a |
The maximum over the NRHS solution vectors of the ratios:
|
|
kusano |
7d535a |
RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR )
|
|
kusano |
7d535a |
RESLTS(2) = BERR / ( (n+1)*EPS + (*) )
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
=====================================================================
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Table of constant values */
|
|
kusano |
7d535a |
int c__1 = 1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* System generated locals */
|
|
kusano |
7d535a |
float d__1, d__2;
|
|
kusano |
7d535a |
float d__3, d__4;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Local variables */
|
|
kusano |
7d535a |
float diff, axbi;
|
|
kusano |
7d535a |
int imax, irow, n__1;
|
|
kusano |
7d535a |
int i, j, k;
|
|
kusano |
7d535a |
float unfl, ovfl;
|
|
kusano |
7d535a |
float xnorm;
|
|
kusano |
7d535a |
float errbnd;
|
|
kusano |
7d535a |
int notran;
|
|
kusano |
7d535a |
float eps, tmp;
|
|
kusano |
7d535a |
float *rwork;
|
|
kusano |
7d535a |
complex *Aval;
|
|
kusano |
7d535a |
NCformat *Astore;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Function prototypes */
|
|
kusano |
7d535a |
extern int lsame_(char *, char *);
|
|
kusano |
7d535a |
extern int icamax_(int *, complex *, int *);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Quick exit if N = 0 or NRHS = 0. */
|
|
kusano |
7d535a |
if ( n <= 0 || nrhs <= 0 ) {
|
|
kusano |
7d535a |
reslts[0] = 0.;
|
|
kusano |
7d535a |
reslts[1] = 0.;
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
eps = slamch_("Epsilon");
|
|
kusano |
7d535a |
unfl = slamch_("Safe minimum");
|
|
kusano |
7d535a |
ovfl = 1. / unfl;
|
|
kusano |
7d535a |
notran = (trans == NOTRANS);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
rwork = (float *) SUPERLU_MALLOC(n*sizeof(float));
|
|
kusano |
7d535a |
if ( !rwork ) ABORT("SUPERLU_MALLOC fails for rwork");
|
|
kusano |
7d535a |
Astore = A->Store;
|
|
kusano |
7d535a |
Aval = (complex *) Astore->nzval;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Test 1: Compute the maximum of
|
|
kusano |
7d535a |
norm(X - XACT) / ( norm(X) * FERR )
|
|
kusano |
7d535a |
over all the vectors X and XACT using the infinity-norm. */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
errbnd = 0.;
|
|
kusano |
7d535a |
for (j = 0; j < nrhs; ++j) {
|
|
kusano |
7d535a |
n__1 = n;
|
|
kusano |
7d535a |
imax = icamax_(&n__1, &x[j*ldx], &c__1);
|
|
kusano |
7d535a |
d__1 = (d__2 = x[imax-1 + j*ldx].r, fabs(d__2)) +
|
|
kusano |
7d535a |
(d__3 = x[imax-1 + j*ldx].i, fabs(d__3));
|
|
kusano |
7d535a |
xnorm = SUPERLU_MAX(d__1,unfl);
|
|
kusano |
7d535a |
diff = 0.;
|
|
kusano |
7d535a |
for (i = 0; i < n; ++i) {
|
|
kusano |
7d535a |
d__1 = (d__2 = x[i+j*ldx].r - xact[i+j*ldxact].r, fabs(d__2)) +
|
|
kusano |
7d535a |
(d__3 = x[i+j*ldx].i - xact[i+j*ldxact].i, fabs(d__3));
|
|
kusano |
7d535a |
diff = SUPERLU_MAX(diff, d__1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (xnorm > 1.) {
|
|
kusano |
7d535a |
goto L20;
|
|
kusano |
7d535a |
} else if (diff <= ovfl * xnorm) {
|
|
kusano |
7d535a |
goto L20;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
errbnd = 1. / eps;
|
|
kusano |
7d535a |
goto L30;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
L20:
|
|
kusano |
7d535a |
#if 0
|
|
kusano |
7d535a |
if (diff / xnorm <= ferr[j]) {
|
|
kusano |
7d535a |
d__1 = diff / xnorm / ferr[j];
|
|
kusano |
7d535a |
errbnd = SUPERLU_MAX(errbnd,d__1);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
errbnd = 1. / eps;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
d__1 = diff / xnorm / ferr[j];
|
|
kusano |
7d535a |
errbnd = SUPERLU_MAX(errbnd,d__1);
|
|
kusano |
7d535a |
/*printf("Ferr: %f\n", errbnd);*/
|
|
kusano |
7d535a |
L30:
|
|
kusano |
7d535a |
;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
reslts[0] = errbnd;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Test 2: Compute the maximum of BERR / ( (n+1)*EPS + (*) ), where
|
|
kusano |
7d535a |
(*) = (n+1)*UNFL / (min_i (abs(op(A))*abs(X) + abs(b))_i ) */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (k = 0; k < nrhs; ++k) {
|
|
kusano |
7d535a |
for (i = 0; i < n; ++i)
|
|
kusano |
7d535a |
rwork[i] = (d__1 = b[i + k*ldb].r, fabs(d__1)) +
|
|
kusano |
7d535a |
(d__2 = b[i + k*ldb].i, fabs(d__2));
|
|
kusano |
7d535a |
if ( notran ) {
|
|
kusano |
7d535a |
for (j = 0; j < n; ++j) {
|
|
kusano |
7d535a |
tmp = (d__1 = x[j + k*ldx].r, fabs(d__1)) +
|
|
kusano |
7d535a |
(d__2 = x[j + k*ldx].i, fabs(d__2));
|
|
kusano |
7d535a |
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
|
|
kusano |
7d535a |
d__1 = (d__2 = Aval[i].r, fabs(d__2)) +
|
|
kusano |
7d535a |
(d__3 = Aval[i].i, fabs(d__3));
|
|
kusano |
7d535a |
rwork[Astore->rowind[i]] += d__1 * tmp;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
for (j = 0; j < n; ++j) {
|
|
kusano |
7d535a |
tmp = 0.;
|
|
kusano |
7d535a |
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
|
|
kusano |
7d535a |
irow = Astore->rowind[i];
|
|
kusano |
7d535a |
d__1 = (d__2 = x[irow + k*ldx].r, fabs(d__2)) +
|
|
kusano |
7d535a |
(d__3 = x[irow + k*ldx].i, fabs(d__3));
|
|
kusano |
7d535a |
d__2 = (d__3 = Aval[i].r, fabs(d__3)) +
|
|
kusano |
7d535a |
(d__4 = Aval[i].i, fabs(d__4));
|
|
kusano |
7d535a |
tmp += d__2 * d__1;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
rwork[j] += tmp;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
axbi = rwork[0];
|
|
kusano |
7d535a |
for (i = 1; i < n; ++i) axbi = SUPERLU_MIN(axbi, rwork[i]);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Computing MAX */
|
|
kusano |
7d535a |
d__1 = axbi, d__2 = (n + 1) * unfl;
|
|
kusano |
7d535a |
tmp = berr[k] / ((n + 1) * eps + (n + 1) * unfl / SUPERLU_MAX(d__1,d__2));
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (k == 0) {
|
|
kusano |
7d535a |
reslts[1] = tmp;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
reslts[1] = SUPERLU_MAX(reslts[1],tmp);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
SUPERLU_FREE(rwork);
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} /* cgst07 */
|