#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "slu_ddefs.h"
#define epsmac 1.0e-16
extern double ddot_(int *, double [], int *, double [], int *);
extern double dnrm2_(int *, double [], int *);
extern void daxpy_(int *, double *, double [], int *, double [], int *);
extern double dcopy_(int *, double [], int *, double [], int *);
int fgmr(int n,
void (*matvec) (double, double[], double, double[]),
void (*psolve) (int, double[], double[]),
double *rhs, double *sol, double tol, int im, int *itmax, FILE * fits)
{
/*----------------------------------------------------------------------
| *** Preconditioned FGMRES ***
+-----------------------------------------------------------------------
| This is a simple version of the ARMS preconditioned FGMRES algorithm.
+-----------------------------------------------------------------------
| Y. S. Dec. 2000. -- Apr. 2008
+-----------------------------------------------------------------------
| on entry:
|----------
|
| rhs = real vector of length n containing the right hand side.
| sol = real vector of length n containing an initial guess to the
| solution on input.
| tol = tolerance for stopping iteration
| im = Krylov subspace dimension
| (itmax) = max number of iterations allowed.
| fits = NULL: no output
| != NULL: file handle to output " resid vs time and its"
|
| on return:
|----------
| fgmr int = 0 --> successful return.
| int = 1 --> convergence not achieved in itmax iterations.
| sol = contains an approximate solution (upon successful return).
| itmax = has changed. It now contains the number of steps required
| to converge --
+-----------------------------------------------------------------------
| internal work arrays:
|----------
| vv = work array of length [im+1][n] (used to store the Arnoldi
| basis)
| hh = work array of length [im][im+1] (Householder matrix)
| z = work array of length [im][n] to store preconditioned vectors
+-----------------------------------------------------------------------
| subroutines called :
| matvec - matrix-vector multiplication operation
| psolve - (right) preconditionning operation
| psolve can be a NULL pointer (GMRES without preconditioner)
+---------------------------------------------------------------------*/
int maxits = *itmax;
int i, i1, ii, j, k, k1, its, retval, i_1 = 1;
double **hh, *c, *s, *rs, t, t0;
double beta, eps1 = 0.0, gam, **vv, **z;
its = 0;
vv = (double **)SUPERLU_MALLOC((im + 1) * sizeof(double *));
for (i = 0; i <= im; i++)
vv[i] = doubleMalloc(n);
z = (double **)SUPERLU_MALLOC(im * sizeof(double *));
hh = (double **)SUPERLU_MALLOC(im * sizeof(double *));
for (i = 0; i < im; i++)
{
hh[i] = doubleMalloc(i + 2);
z[i] = doubleMalloc(n);
}
c = doubleMalloc(im);
s = doubleMalloc(im);
rs = doubleMalloc(im + 1);
/*---- outer loop starts here ----*/
do
{
/*---- compute initial residual vector ----*/
matvec(1.0, sol, 0.0, vv[0]);
for (j = 0; j < n; j++)
vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */
beta = dnrm2_(&n, vv[0], &i_1);
/*---- print info if fits != null ----*/
if (fits != NULL && its == 0)
fprintf(fits, "%8d %10.2e\n", its, beta);
/*if ( beta < tol * dnrm2_(&n, rhs, &i_1) )*/
if ( !(beta >= tol * dnrm2_(&n, rhs, &i_1)) )
break;
t = 1.0 / beta;
/*---- normalize: vv[0] = vv[0] / beta ----*/
for (j = 0; j < n; j++)
vv[0][j] = vv[0][j] * t;
if (its == 0)
eps1 = tol * beta;
/*---- initialize 1-st term of rhs of hessenberg system ----*/
rs[0] = beta;
for (i = 0; i < im; i++)
{
its++;
i1 = i + 1;
/*------------------------------------------------------------
| (Right) Preconditioning Operation z_{j} = M^{-1} v_{j}
+-----------------------------------------------------------*/
if (psolve)
psolve(n, z[i], vv[i]);
else
dcopy_(&n, vv[i], &i_1, z[i], &i_1);
/*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
matvec(1.0, z[i], 0.0, vv[i1]);
/*------------------------------------------------------------
| modified gram - schmidt...
| h_{i,j} = (w,v_{i})
| w = w - h_{i,j} v_{i}
+------------------------------------------------------------*/
t0 = dnrm2_(&n, vv[i1], &i_1);
for (j = 0; j <= i; j++)
{
double negt;
t = ddot_(&n, vv[j], &i_1, vv[i1], &i_1);
hh[i][j] = t;
negt = -t;
daxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
}
/*---- h_{j+1,j} = ||w||_{2} ----*/
t = dnrm2_(&n, vv[i1], &i_1);
while (t < 0.5 * t0)
{
t0 = t;
for (j = 0; j <= i; j++)
{
double negt;
t = ddot_(&n, vv[j], &i_1, vv[i1], &i_1);
hh[i][j] += t;
negt = -t;
daxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
}
t = dnrm2_(&n, vv[i1], &i_1);
}
hh[i][i1] = t;
if (t != 0.0)
{
/*---- v_{j+1} = w / h_{j+1,j} ----*/
t = 1.0 / t;
for (k = 0; k < n; k++)
vv[i1][k] = vv[i1][k] * t;
}
/*---------------------------------------------------
| done with modified gram schimdt and arnoldi step
| now update factorization of hh
+--------------------------------------------------*/
/*--------------------------------------------------------
| perform previous transformations on i-th column of h
+-------------------------------------------------------*/
for (k = 1; k <= i; k++)
{
k1 = k - 1;
t = hh[i][k1];
hh[i][k1] = c[k1] * t + s[k1] * hh[i][k];
hh[i][k] = -s[k1] * t + c[k1] * hh[i][k];
}
gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2));
/*---------------------------------------------------
| if gamma is zero then any small value will do
| affect only residual estimate
+--------------------------------------------------*/
/* if (gam == 0.0) gam = epsmac; */
/*---- get next plane rotation ---*/
if (gam > 0.0)
{
c[i] = hh[i][i] / gam;
s[i] = hh[i][i1] / gam;
}
else
{
c[i] = 1.0;
s[i] = 0.0;
}
rs[i1] = -s[i] * rs[i];
rs[i] = c[i] * rs[i];
/*----------------------------------------------------
| determine residual norm and test for convergence
+---------------------------------------------------*/
hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1];
beta = fabs(rs[i1]);
if (fits != NULL)
fprintf(fits, "%8d %10.2e\n", its, beta);
if (beta <= eps1 || its >= maxits)
break;
}
if (i == im) i--;
/*---- now compute solution. 1st, solve upper triangular system ----*/
rs[i] = rs[i] / hh[i][i];
for (ii = 1; ii <= i; ii++)
{
k = i - ii;
k1 = k + 1;
t = rs[k];
for (j = k1; j <= i; j++)
t = t - hh[j][k] * rs[j];
rs[k] = t / hh[k][k];
}
/*---- linear combination of v[i]'s to get sol. ----*/
for (j = 0; j <= i; j++)
{
t = rs[j];
for (k = 0; k < n; k++)
sol[k] += t * z[j][k];
}
/* calculate the residual and output */
matvec(1.0, sol, 0.0, vv[0]);
for (j = 0; j < n; j++)
vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */
/*---- print info if fits != null ----*/
beta = dnrm2_(&n, vv[0], &i_1);
/*---- restart outer loop if needed ----*/
/*if (beta >= eps1 / tol)*/
if ( !(beta < eps1 / tol) )
{
its = maxits + 10;
break;
}
if (beta <= eps1)
break;
} while(its < maxits);
retval = (its >= maxits);
for (i = 0; i <= im; i++)
SUPERLU_FREE(vv[i]);
SUPERLU_FREE(vv);
for (i = 0; i < im; i++)
{
SUPERLU_FREE(hh[i]);
SUPERLU_FREE(z[i]);
}
SUPERLU_FREE(hh);
SUPERLU_FREE(z);
SUPERLU_FREE(c);
SUPERLU_FREE(s);
SUPERLU_FREE(rs);
*itmax = its;
return retval;
} /*----end of fgmr ----*/