|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file cfgmr.c
|
|
kusano |
7d535a |
* \brief flexible GMRES from ITSOL developed by Yousef Saad.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* ITSOL COPYRIGHT
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Copyright (C) 2006, the University of Minnesota
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
ITSOL is free software; you can redistribute it and/or modify it under
|
|
kusano |
7d535a |
the terms of the GNU General Public License as published by the Free
|
|
kusano |
7d535a |
Software Foundation [version 2 of the License, or any later version]
|
|
kusano |
7d535a |
For details, see
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
http://www.gnu.org/copyleft/gpl.html
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
A copy of the GNU licencing agreement is attached to the ITSOL package
|
|
kusano |
7d535a |
in the file GNU. For additional information contact the Free Software
|
|
kusano |
7d535a |
Foundation Inc., 65 Mass Ave, Cambridge, MA 02139, USA.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
DISCLAIMER
|
|
kusano |
7d535a |
----------
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
This program is distributed in the hope that it will be useful, but
|
|
kusano |
7d535a |
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
kusano |
7d535a |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
kusano |
7d535a |
General Public License for more details.
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
For information on ITSOL contact saad@cs.umn.edu
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include "slu_cdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#define epsmac 1.0e-16
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
extern void cdotc_(complex *, int *, complex [], int *, complex [], int *);
|
|
kusano |
7d535a |
extern float scnrm2_(int *, complex [], int *);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int cfgmr(int n,
|
|
kusano |
7d535a |
void (*cmatvec) (complex, complex[], complex, complex[]),
|
|
kusano |
7d535a |
void (*cpsolve) (int, complex[], complex[]),
|
|
kusano |
7d535a |
complex *rhs, complex *sol, double tol, int im, int *itmax, FILE * fits)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/*----------------------------------------------------------------------
|
|
kusano |
7d535a |
| *** Preconditioned FGMRES ***
|
|
kusano |
7d535a |
+-----------------------------------------------------------------------
|
|
kusano |
7d535a |
| This is a simple version of the ARMS preconditioned FGMRES algorithm.
|
|
kusano |
7d535a |
+-----------------------------------------------------------------------
|
|
kusano |
7d535a |
| Y. S. Dec. 2000. -- Apr. 2008
|
|
kusano |
7d535a |
+-----------------------------------------------------------------------
|
|
kusano |
7d535a |
| on entry:
|
|
kusano |
7d535a |
|----------
|
|
kusano |
7d535a |
|
|
|
kusano |
7d535a |
| rhs = real vector of length n containing the right hand side.
|
|
kusano |
7d535a |
| sol = real vector of length n containing an initial guess to the
|
|
kusano |
7d535a |
| solution on input.
|
|
kusano |
7d535a |
| tol = tolerance for stopping iteration
|
|
kusano |
7d535a |
| im = Krylov subspace dimension
|
|
kusano |
7d535a |
| (itmax) = max number of iterations allowed.
|
|
kusano |
7d535a |
| fits = NULL: no output
|
|
kusano |
7d535a |
| != NULL: file handle to output " resid vs time and its"
|
|
kusano |
7d535a |
|
|
|
kusano |
7d535a |
| on return:
|
|
kusano |
7d535a |
|----------
|
|
kusano |
7d535a |
| fgmr int = 0 --> successful return.
|
|
kusano |
7d535a |
| int = 1 --> convergence not achieved in itmax iterations.
|
|
kusano |
7d535a |
| sol = contains an approximate solution (upon successful return).
|
|
kusano |
7d535a |
| itmax = has changed. It now contains the number of steps required
|
|
kusano |
7d535a |
| to converge --
|
|
kusano |
7d535a |
+-----------------------------------------------------------------------
|
|
kusano |
7d535a |
| internal work arrays:
|
|
kusano |
7d535a |
|----------
|
|
kusano |
7d535a |
| vv = work array of length [im+1][n] (used to store the Arnoldi
|
|
kusano |
7d535a |
| basis)
|
|
kusano |
7d535a |
| hh = work array of length [im][im+1] (Householder matrix)
|
|
kusano |
7d535a |
| z = work array of length [im][n] to store preconditioned vectors
|
|
kusano |
7d535a |
+-----------------------------------------------------------------------
|
|
kusano |
7d535a |
| subroutines called :
|
|
kusano |
7d535a |
| matvec - matrix-vector multiplication operation
|
|
kusano |
7d535a |
| psolve - (right) preconditionning operation
|
|
kusano |
7d535a |
| psolve can be a NULL pointer (GMRES without preconditioner)
|
|
kusano |
7d535a |
+---------------------------------------------------------------------*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int maxits = *itmax;
|
|
kusano |
7d535a |
int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2;
|
|
kusano |
7d535a |
float beta, eps1 = 0.0, t, t0, gam;
|
|
kusano |
7d535a |
complex **hh, *c, *s, *rs;
|
|
kusano |
7d535a |
complex **vv, **z, tt;
|
|
kusano |
7d535a |
complex zero = {0.0, 0.0};
|
|
kusano |
7d535a |
complex one = {1.0, 0.0};
|
|
kusano |
7d535a |
complex tt1, tt2;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
its = 0;
|
|
kusano |
7d535a |
vv = (complex **)SUPERLU_MALLOC((im + 1) * sizeof(complex *));
|
|
kusano |
7d535a |
for (i = 0; i <= im; i++) vv[i] = complexMalloc(n);
|
|
kusano |
7d535a |
z = (complex **)SUPERLU_MALLOC(im * sizeof(complex *));
|
|
kusano |
7d535a |
hh = (complex **)SUPERLU_MALLOC(im * sizeof(complex *));
|
|
kusano |
7d535a |
for (i = 0; i < im; i++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
hh[i] = complexMalloc(i + 2);
|
|
kusano |
7d535a |
z[i] = complexMalloc(n);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
c = complexMalloc(im);
|
|
kusano |
7d535a |
s = complexMalloc(im);
|
|
kusano |
7d535a |
rs = complexMalloc(im + 1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- outer loop starts here ----*/
|
|
kusano |
7d535a |
do
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/*---- compute initial residual vector ----*/
|
|
kusano |
7d535a |
cmatvec(one, sol, zero, vv[0]);
|
|
kusano |
7d535a |
for (j = 0; j < n; j++)
|
|
kusano |
7d535a |
c_sub(&vv[0][j], &rhs[j], &vv[0][j]); /* vv[0]= initial residual */
|
|
kusano |
7d535a |
beta = scnrm2_(&n, vv[0], &i_1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- print info if fits != null ----*/
|
|
kusano |
7d535a |
if (fits != NULL && its == 0)
|
|
kusano |
7d535a |
fprintf(fits, "%8d %10.2e\n", its, beta);
|
|
kusano |
7d535a |
/*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/
|
|
kusano |
7d535a |
if ( !(beta > tol * scnrm2_(&n, rhs, &i_1)) )
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
t = 1.0 / beta;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- normalize: vv[0] = vv[0] / beta ----*/
|
|
kusano |
7d535a |
for (j = 0; j < n; j++)
|
|
kusano |
7d535a |
cs_mult(&vv[0][j], &vv[0][j], t);
|
|
kusano |
7d535a |
if (its == 0)
|
|
kusano |
7d535a |
eps1 = tol * beta;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- initialize 1-st term of rhs of hessenberg system ----*/
|
|
kusano |
7d535a |
rs[0].r = beta;
|
|
kusano |
7d535a |
rs[0].i = 0.0;
|
|
kusano |
7d535a |
for (i = 0; i < im; i++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
its++;
|
|
kusano |
7d535a |
i1 = i + 1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*------------------------------------------------------------
|
|
kusano |
7d535a |
| (Right) Preconditioning Operation z_{j} = M^{-1} v_{j}
|
|
kusano |
7d535a |
+-----------------------------------------------------------*/
|
|
kusano |
7d535a |
if (cpsolve)
|
|
kusano |
7d535a |
cpsolve(n, z[i], vv[i]);
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
ccopy_(&n, vv[i], &i_1, z[i], &i_1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
|
|
kusano |
7d535a |
cmatvec(one, z[i], zero, vv[i1]);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*------------------------------------------------------------
|
|
kusano |
7d535a |
| modified gram - schmidt...
|
|
kusano |
7d535a |
| h_{i,j} = (w,v_{i})
|
|
kusano |
7d535a |
| w = w - h_{i,j} v_{i}
|
|
kusano |
7d535a |
+------------------------------------------------------------*/
|
|
kusano |
7d535a |
t0 = scnrm2_(&n, vv[i1], &i_1);
|
|
kusano |
7d535a |
for (j = 0; j <= i; j++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
complex negt;
|
|
kusano |
7d535a |
#if 0
|
|
kusano |
7d535a |
cdotc_(&tt, &n, vv[j], &i_1, vv[i1], &i_1);
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
tt = zero;
|
|
kusano |
7d535a |
for (k = 0; k < n; ++k) {
|
|
kusano |
7d535a |
cc_conj(&tt1, &vv[j][k]);
|
|
kusano |
7d535a |
cc_mult(&tt2, &tt1, &vv[i1][k]);
|
|
kusano |
7d535a |
c_add(&tt, &tt, &tt2);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
hh[i][j] = tt;
|
|
kusano |
7d535a |
negt.r = -tt.r;
|
|
kusano |
7d535a |
negt.i = -tt.i;
|
|
kusano |
7d535a |
caxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- h_{j+1,j} = ||w||_{2} ----*/
|
|
kusano |
7d535a |
t = scnrm2_(&n, vv[i1], &i_1);
|
|
kusano |
7d535a |
while (t < 0.5 * t0)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
t0 = t;
|
|
kusano |
7d535a |
for (j = 0; j <= i; j++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
complex negt;
|
|
kusano |
7d535a |
#if 0
|
|
kusano |
7d535a |
cdotc_(&tt, &n, vv[j], &i_1, vv[i1], &i_1);
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
tt = zero;
|
|
kusano |
7d535a |
for (k = 0; k < n; ++k) {
|
|
kusano |
7d535a |
cc_conj(&tt1, &vv[j][k]);
|
|
kusano |
7d535a |
cc_mult(&tt2, &tt1, &vv[i1][k]);
|
|
kusano |
7d535a |
c_add(&tt, &tt, &tt2);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
c_add(&hh[i][j], &hh[i][j], &tt);
|
|
kusano |
7d535a |
negt.r = -tt.r;
|
|
kusano |
7d535a |
negt.i = -tt.i;
|
|
kusano |
7d535a |
caxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
t = scnrm2_(&n, vv[i1], &i_1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
hh[i][i1].r = t;
|
|
kusano |
7d535a |
hh[i][i1].i = 0.0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (t != 0.0)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/*---- v_{j+1} = w / h_{j+1,j} ----*/
|
|
kusano |
7d535a |
t = 1.0 / t;
|
|
kusano |
7d535a |
for (k = 0; k < n; k++)
|
|
kusano |
7d535a |
cs_mult(&vv[i1][k], &vv[i1][k], t);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
/*---------------------------------------------------
|
|
kusano |
7d535a |
| done with modified gram schimdt and arnoldi step
|
|
kusano |
7d535a |
| now update factorization of hh
|
|
kusano |
7d535a |
+--------------------------------------------------*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*--------------------------------------------------------
|
|
kusano |
7d535a |
| perform previous transformations on i-th column of h
|
|
kusano |
7d535a |
+-------------------------------------------------------*/
|
|
kusano |
7d535a |
for (k = 1; k <= i; k++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
k1 = k - 1;
|
|
kusano |
7d535a |
tt = hh[i][k1];
|
|
kusano |
7d535a |
cc_mult(&tt1, &c[k1], &tt);
|
|
kusano |
7d535a |
cc_mult(&tt2, &s[k1], &hh[i][k]);
|
|
kusano |
7d535a |
c_add(&hh[i][k1], &tt1, &tt2);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
cc_mult(&tt1, &s[k1], &tt);
|
|
kusano |
7d535a |
cc_mult(&tt2, &c[k1], &hh[i][k]);
|
|
kusano |
7d535a |
c_sub(&hh[i][k], &tt2, &tt1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
gam = scnrm2_(&i_2, &hh[i][i], &i_1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---------------------------------------------------
|
|
kusano |
7d535a |
| if gamma is zero then any small value will do
|
|
kusano |
7d535a |
| affect only residual estimate
|
|
kusano |
7d535a |
+--------------------------------------------------*/
|
|
kusano |
7d535a |
/* if (gam == 0.0) gam = epsmac; */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- get next plane rotation ---*/
|
|
kusano |
7d535a |
if (gam == 0.0)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
c[i] = one;
|
|
kusano |
7d535a |
s[i] = zero;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
gam = 1.0 / gam;
|
|
kusano |
7d535a |
cs_mult(&c[i], &hh[i][i], gam);
|
|
kusano |
7d535a |
cs_mult(&s[i], &hh[i][i1], gam);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
cc_mult(&rs[i1], &s[i], &rs[i]);
|
|
kusano |
7d535a |
rs[i1].r = -rs[i1].r; rs[i1].i = -rs[i1].i;
|
|
kusano |
7d535a |
cc_mult(&rs[i], &c[i], &rs[i]);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*----------------------------------------------------
|
|
kusano |
7d535a |
| determine residual norm and test for convergence
|
|
kusano |
7d535a |
+---------------------------------------------------*/
|
|
kusano |
7d535a |
cc_mult(&tt1, &c[i], &hh[i][i]);
|
|
kusano |
7d535a |
cc_mult(&tt2, &s[i], &hh[i][i1]);
|
|
kusano |
7d535a |
c_add(&hh[i][i], &tt1, &tt2);
|
|
kusano |
7d535a |
beta = c_abs1(&rs[i1]);
|
|
kusano |
7d535a |
if (fits != NULL)
|
|
kusano |
7d535a |
fprintf(fits, "%8d %10.2e\n", its, beta);
|
|
kusano |
7d535a |
if (beta <= eps1 || its >= maxits)
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (i == im) i--;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- now compute solution. 1st, solve upper triangular system ----*/
|
|
kusano |
7d535a |
c_div(&rs[i], &rs[i], &hh[i][i]);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (ii = 1; ii <= i; ii++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
k = i - ii;
|
|
kusano |
7d535a |
k1 = k + 1;
|
|
kusano |
7d535a |
tt = rs[k];
|
|
kusano |
7d535a |
for (j = k1; j <= i; j++) {
|
|
kusano |
7d535a |
cc_mult(&tt1, &hh[j][k], &rs[j]);
|
|
kusano |
7d535a |
c_sub(&tt, &tt, &tt1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
c_div(&rs[k], &tt, &hh[k][k]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- linear combination of v[i]'s to get sol. ----*/
|
|
kusano |
7d535a |
for (j = 0; j <= i; j++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
tt = rs[j];
|
|
kusano |
7d535a |
for (k = 0; k < n; k++) {
|
|
kusano |
7d535a |
cc_mult(&tt1, &tt, &z[j][k]);
|
|
kusano |
7d535a |
c_add(&sol[k], &sol[k], &tt1);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* calculate the residual and output */
|
|
kusano |
7d535a |
cmatvec(one, sol, zero, vv[0]);
|
|
kusano |
7d535a |
for (j = 0; j < n; j++)
|
|
kusano |
7d535a |
c_sub(&vv[0][j], &rhs[j], &vv[0][j]);/* vv[0]= initial residual */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- print info if fits != null ----*/
|
|
kusano |
7d535a |
beta = scnrm2_(&n, vv[0], &i_1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*---- restart outer loop if needed ----*/
|
|
kusano |
7d535a |
/*if (beta >= eps1 / tol)*/
|
|
kusano |
7d535a |
if ( !(beta < eps1 / tol) )
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
its = maxits + 10;
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (beta <= eps1)
|
|
kusano |
7d535a |
break;
|
|
kusano |
7d535a |
} while(its < maxits);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
retval = (its >= maxits);
|
|
kusano |
7d535a |
for (i = 0; i <= im; i++)
|
|
kusano |
7d535a |
SUPERLU_FREE(vv[i]);
|
|
kusano |
7d535a |
SUPERLU_FREE(vv);
|
|
kusano |
7d535a |
for (i = 0; i < im; i++)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
SUPERLU_FREE(hh[i]);
|
|
kusano |
7d535a |
SUPERLU_FREE(z[i]);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
SUPERLU_FREE(hh);
|
|
kusano |
7d535a |
SUPERLU_FREE(z);
|
|
kusano |
7d535a |
SUPERLU_FREE(c);
|
|
kusano |
7d535a |
SUPERLU_FREE(s);
|
|
kusano |
7d535a |
SUPERLU_FREE(rs);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
*itmax = its;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return retval;
|
|
kusano |
7d535a |
} /*----end of fgmr ----*/
|