kusano 7d535a
kusano 7d535a
/*! @file zfgmr.c
kusano 7d535a
 * \brief flexible GMRES from ITSOL developed by Yousef Saad.
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
/* ITSOL COPYRIGHT
kusano 7d535a
kusano 7d535a
Copyright (C) 2006, the University of Minnesota 
kusano 7d535a
kusano 7d535a
ITSOL is free software; you can redistribute it and/or modify it under
kusano 7d535a
the terms of  the GNU General Public License as  published by the Free
kusano 7d535a
Software Foundation [version 2 of  the License, or any later version]
kusano 7d535a
For details, see 
kusano 7d535a
kusano 7d535a
http://www.gnu.org/copyleft/gpl.html
kusano 7d535a
kusano 7d535a
A copy of the GNU licencing agreement is attached to the ITSOL package
kusano 7d535a
in the file GNU.  For additional information contact the Free Software
kusano 7d535a
Foundation Inc., 65 Mass Ave, Cambridge, MA 02139, USA.
kusano 7d535a
kusano 7d535a
DISCLAIMER
kusano 7d535a
----------
kusano 7d535a
kusano 7d535a
This program  is distributed in the  hope that it will  be useful, but
kusano 7d535a
WITHOUT   ANY  WARRANTY;   without  even   the  implied   warranty  of
kusano 7d535a
MERCHANTABILITY  or FITNESS  FOR A  PARTICULAR PURPOSE.   See  the GNU
kusano 7d535a
General Public License for more details. 
kusano 7d535a
kusano 7d535a
For information on ITSOL contact saad@cs.umn.edu
kusano 7d535a
*/
kusano 7d535a
kusano 7d535a
#include "slu_zdefs.h"
kusano 7d535a
kusano 7d535a
#define  epsmac  1.0e-16
kusano 7d535a
kusano 7d535a
extern void zdotc_(doublecomplex *, int *, doublecomplex [], int *, doublecomplex [], int *);
kusano 7d535a
extern double dznrm2_(int *, doublecomplex [], int *);
kusano 7d535a
kusano 7d535a
kusano 7d535a
int zfgmr(int n,
kusano 7d535a
     void (*zmatvec) (doublecomplex, doublecomplex[], doublecomplex, doublecomplex[]),
kusano 7d535a
     void (*zpsolve) (int, doublecomplex[], doublecomplex[]),
kusano 7d535a
     doublecomplex *rhs, doublecomplex *sol, double tol, int im, int *itmax, FILE * fits)
kusano 7d535a
{
kusano 7d535a
/*----------------------------------------------------------------------
kusano 7d535a
|                 *** Preconditioned FGMRES ***
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| This is a simple version of the ARMS preconditioned FGMRES algorithm.
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| Y. S. Dec. 2000. -- Apr. 2008
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| on entry:
kusano 7d535a
|----------
kusano 7d535a
|
kusano 7d535a
| rhs     = real vector of length n containing the right hand side.
kusano 7d535a
| sol     = real vector of length n containing an initial guess to the
kusano 7d535a
|           solution on input.
kusano 7d535a
| tol     = tolerance for stopping iteration
kusano 7d535a
| im      = Krylov subspace dimension
kusano 7d535a
| (itmax) = max number of iterations allowed.
kusano 7d535a
| fits    = NULL: no output
kusano 7d535a
|        != NULL: file handle to output " resid vs time and its"
kusano 7d535a
|
kusano 7d535a
| on return:
kusano 7d535a
|----------
kusano 7d535a
| fgmr      int =  0 --> successful return.
kusano 7d535a
|           int =  1 --> convergence not achieved in itmax iterations.
kusano 7d535a
| sol     = contains an approximate solution (upon successful return).
kusano 7d535a
| itmax   = has changed. It now contains the number of steps required
kusano 7d535a
|           to converge --
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| internal work arrays:
kusano 7d535a
|----------
kusano 7d535a
| vv      = work array of length [im+1][n] (used to store the Arnoldi
kusano 7d535a
|           basis)
kusano 7d535a
| hh      = work array of length [im][im+1] (Householder matrix)
kusano 7d535a
| z       = work array of length [im][n] to store preconditioned vectors
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| subroutines called :
kusano 7d535a
| matvec - matrix-vector multiplication operation
kusano 7d535a
| psolve - (right) preconditionning operation
kusano 7d535a
|	   psolve can be a NULL pointer (GMRES without preconditioner)
kusano 7d535a
+---------------------------------------------------------------------*/
kusano 7d535a
kusano 7d535a
    int maxits = *itmax;
kusano 7d535a
    int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2;
kusano 7d535a
    double beta, eps1 = 0.0, t, t0, gam;
kusano 7d535a
    doublecomplex **hh, *c, *s, *rs;
kusano 7d535a
    doublecomplex **vv, **z, tt;
kusano 7d535a
    doublecomplex zero = {0.0, 0.0};
kusano 7d535a
    doublecomplex one = {1.0, 0.0};
kusano 7d535a
    doublecomplex tt1, tt2;
kusano 7d535a
kusano 7d535a
    its = 0;
kusano 7d535a
    vv = (doublecomplex **)SUPERLU_MALLOC((im + 1) * sizeof(doublecomplex *));
kusano 7d535a
    for (i = 0; i <= im; i++) vv[i] = doublecomplexMalloc(n);
kusano 7d535a
    z = (doublecomplex **)SUPERLU_MALLOC(im * sizeof(doublecomplex *));
kusano 7d535a
    hh = (doublecomplex **)SUPERLU_MALLOC(im * sizeof(doublecomplex *));
kusano 7d535a
    for (i = 0; i < im; i++)
kusano 7d535a
    {
kusano 7d535a
	hh[i] = doublecomplexMalloc(i + 2);
kusano 7d535a
	z[i] = doublecomplexMalloc(n);
kusano 7d535a
    }
kusano 7d535a
    c = doublecomplexMalloc(im);
kusano 7d535a
    s = doublecomplexMalloc(im);
kusano 7d535a
    rs = doublecomplexMalloc(im + 1);
kusano 7d535a
kusano 7d535a
    /*---- outer loop starts here ----*/
kusano 7d535a
    do
kusano 7d535a
    {
kusano 7d535a
	/*---- compute initial residual vector ----*/
kusano 7d535a
	zmatvec(one, sol, zero, vv[0]);
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    z_sub(&vv[0][j], &rhs[j], &vv[0][j]);	/* vv[0]= initial residual */
kusano 7d535a
	beta = dznrm2_(&n, vv[0], &i_1);
kusano 7d535a
kusano 7d535a
	/*---- print info if fits != null ----*/
kusano 7d535a
	if (fits != NULL && its == 0)
kusano 7d535a
	    fprintf(fits, "%8d   %10.2e\n", its, beta);
kusano 7d535a
	/*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/
kusano 7d535a
	if ( !(beta > tol * dznrm2_(&n, rhs, &i_1)) )
kusano 7d535a
	    break;
kusano 7d535a
	t = 1.0 / beta;
kusano 7d535a
kusano 7d535a
	/*---- normalize: vv[0] = vv[0] / beta ----*/
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    zd_mult(&vv[0][j], &vv[0][j], t);
kusano 7d535a
	if (its == 0)
kusano 7d535a
	    eps1 = tol * beta;
kusano 7d535a
kusano 7d535a
	/*---- initialize 1-st term of rhs of hessenberg system ----*/
kusano 7d535a
	rs[0].r = beta;
kusano 7d535a
	rs[0].i = 0.0;
kusano 7d535a
	for (i = 0; i < im; i++)
kusano 7d535a
	{
kusano 7d535a
	    its++;
kusano 7d535a
	    i1 = i + 1;
kusano 7d535a
kusano 7d535a
	    /*------------------------------------------------------------
kusano 7d535a
	    |  (Right) Preconditioning Operation   z_{j} = M^{-1} v_{j}
kusano 7d535a
	    +-----------------------------------------------------------*/
kusano 7d535a
	    if (zpsolve)
kusano 7d535a
		zpsolve(n, z[i], vv[i]);
kusano 7d535a
	    else
kusano 7d535a
		zcopy_(&n, vv[i], &i_1, z[i], &i_1);
kusano 7d535a
kusano 7d535a
	    /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
kusano 7d535a
	    zmatvec(one, z[i], zero, vv[i1]);
kusano 7d535a
kusano 7d535a
	    /*------------------------------------------------------------
kusano 7d535a
	    |     modified gram - schmidt...
kusano 7d535a
	    |     h_{i,j} = (w,v_{i})
kusano 7d535a
	    |     w  = w - h_{i,j} v_{i}
kusano 7d535a
	    +------------------------------------------------------------*/
kusano 7d535a
	    t0 = dznrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    for (j = 0; j <= i; j++)
kusano 7d535a
	    {
kusano 7d535a
		doublecomplex negt;
kusano 7d535a
#if 0
kusano 7d535a
		zdotc_(&tt, &n, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
#else
kusano 7d535a
		tt = zero;
kusano 7d535a
		for (k = 0; k < n; ++k) {
kusano 7d535a
		    zz_conj(&tt1, &vv[j][k]);
kusano 7d535a
		    zz_mult(&tt2, &tt1, &vv[i1][k]);
kusano 7d535a
		    z_add(&tt, &tt, &tt2);
kusano 7d535a
		}
kusano 7d535a
#endif
kusano 7d535a
		hh[i][j] = tt;
kusano 7d535a
		negt.r = -tt.r;
kusano 7d535a
		negt.i = -tt.i;
kusano 7d535a
		zaxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    /*---- h_{j+1,j} = ||w||_{2} ----*/
kusano 7d535a
	    t = dznrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    while (t < 0.5 * t0)
kusano 7d535a
	    {
kusano 7d535a
		t0 = t;
kusano 7d535a
		for (j = 0; j <= i; j++)
kusano 7d535a
		{
kusano 7d535a
		    doublecomplex negt;
kusano 7d535a
#if 0
kusano 7d535a
		    zdotc_(&tt, &n, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
#else
kusano 7d535a
   	            tt = zero;
kusano 7d535a
		    for (k = 0; k < n; ++k) {
kusano 7d535a
		        zz_conj(&tt1, &vv[j][k]);
kusano 7d535a
		        zz_mult(&tt2, &tt1, &vv[i1][k]);
kusano 7d535a
		        z_add(&tt, &tt, &tt2);
kusano 7d535a
		    }
kusano 7d535a
#endif
kusano 7d535a
		    z_add(&hh[i][j], &hh[i][j], &tt);
kusano 7d535a
		    negt.r = -tt.r;
kusano 7d535a
		    negt.i = -tt.i;
kusano 7d535a
		    zaxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
		}
kusano 7d535a
		t = dznrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    hh[i][i1].r = t;
kusano 7d535a
	    hh[i][i1].i = 0.0;
kusano 7d535a
kusano 7d535a
	    if (t != 0.0)
kusano 7d535a
	    {
kusano 7d535a
		/*---- v_{j+1} = w / h_{j+1,j} ----*/
kusano 7d535a
		t = 1.0 / t;
kusano 7d535a
		for (k = 0; k < n; k++)
kusano 7d535a
	            zd_mult(&vv[i1][k], &vv[i1][k], t);
kusano 7d535a
	    }
kusano 7d535a
	    /*---------------------------------------------------
kusano 7d535a
	    |     done with modified gram schimdt and arnoldi step
kusano 7d535a
	    |     now  update factorization of hh
kusano 7d535a
	    +--------------------------------------------------*/
kusano 7d535a
kusano 7d535a
	    /*--------------------------------------------------------
kusano 7d535a
	    |   perform previous transformations  on i-th column of h
kusano 7d535a
	    +-------------------------------------------------------*/
kusano 7d535a
	    for (k = 1; k <= i; k++)
kusano 7d535a
	    {
kusano 7d535a
		k1 = k - 1;
kusano 7d535a
		tt = hh[i][k1];
kusano 7d535a
                zz_mult(&tt1, &c[k1], &tt);
kusano 7d535a
                zz_mult(&tt2, &s[k1], &hh[i][k]);
kusano 7d535a
                z_add(&hh[i][k1], &tt1, &tt2);
kusano 7d535a
kusano 7d535a
                zz_mult(&tt1, &s[k1], &tt);
kusano 7d535a
                zz_mult(&tt2, &c[k1], &hh[i][k]);
kusano 7d535a
                z_sub(&hh[i][k], &tt2, &tt1);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    gam = dznrm2_(&i_2, &hh[i][i], &i_1);
kusano 7d535a
kusano 7d535a
	    /*---------------------------------------------------
kusano 7d535a
	    |     if gamma is zero then any small value will do
kusano 7d535a
	    |     affect only residual estimate
kusano 7d535a
	    +--------------------------------------------------*/
kusano 7d535a
	    /* if (gam == 0.0) gam = epsmac; */
kusano 7d535a
kusano 7d535a
	    /*---- get next plane rotation ---*/
kusano 7d535a
	    if (gam == 0.0)
kusano 7d535a
	    {
kusano 7d535a
		c[i] = one;
kusano 7d535a
		s[i] = zero;
kusano 7d535a
	    }
kusano 7d535a
            else
kusano 7d535a
	    {
kusano 7d535a
                gam = 1.0 / gam;
kusano 7d535a
		zd_mult(&c[i], &hh[i][i], gam);
kusano 7d535a
		zd_mult(&s[i], &hh[i][i1], gam);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    zz_mult(&rs[i1], &s[i], &rs[i]);
kusano 7d535a
            rs[i1].r = -rs[i1].r;  rs[i1].i = -rs[i1].i;
kusano 7d535a
	    zz_mult(&rs[i], &c[i], &rs[i]);
kusano 7d535a
kusano 7d535a
	    /*----------------------------------------------------
kusano 7d535a
	    |   determine residual norm and test for convergence
kusano 7d535a
	    +---------------------------------------------------*/
kusano 7d535a
            zz_mult(&tt1, &c[i], &hh[i][i]);
kusano 7d535a
            zz_mult(&tt2, &s[i], &hh[i][i1]);
kusano 7d535a
            z_add(&hh[i][i], &tt1, &tt2);
kusano 7d535a
            beta = z_abs1(&rs[i1]);
kusano 7d535a
	    if (fits != NULL)
kusano 7d535a
		fprintf(fits, "%8d   %10.2e\n", its, beta);
kusano 7d535a
	    if (beta <= eps1 || its >= maxits)
kusano 7d535a
		break;
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	if (i == im) i--;
kusano 7d535a
kusano 7d535a
	/*---- now compute solution. 1st, solve upper triangular system ----*/
kusano 7d535a
	z_div(&rs[i], &rs[i], &hh[i][i]);
kusano 7d535a
kusano 7d535a
	for (ii = 1; ii <= i; ii++)
kusano 7d535a
	{
kusano 7d535a
	    k = i - ii;
kusano 7d535a
	    k1 = k + 1;
kusano 7d535a
	    tt = rs[k];
kusano 7d535a
	    for (j = k1; j <= i; j++) {
kusano 7d535a
                zz_mult(&tt1, &hh[j][k], &rs[j]);
kusano 7d535a
		z_sub(&tt, &tt, &tt1);
kusano 7d535a
            }
kusano 7d535a
            z_div(&rs[k], &tt, &hh[k][k]);
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	/*---- linear combination of v[i]'s to get sol. ----*/
kusano 7d535a
	for (j = 0; j <= i; j++)
kusano 7d535a
	{
kusano 7d535a
	    tt = rs[j];
kusano 7d535a
	    for (k = 0; k < n; k++) {
kusano 7d535a
                zz_mult(&tt1, &tt, &z[j][k]);
kusano 7d535a
		z_add(&sol[k], &sol[k], &tt1);
kusano 7d535a
            }
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	/* calculate the residual and output */
kusano 7d535a
	zmatvec(one, sol, zero, vv[0]);
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    z_sub(&vv[0][j], &rhs[j], &vv[0][j]);/* vv[0]= initial residual */
kusano 7d535a
kusano 7d535a
	/*---- print info if fits != null ----*/
kusano 7d535a
	beta = dznrm2_(&n, vv[0], &i_1);
kusano 7d535a
kusano 7d535a
	/*---- restart outer loop if needed ----*/
kusano 7d535a
	/*if (beta >= eps1 / tol)*/
kusano 7d535a
	if ( !(beta < eps1 / tol) )
kusano 7d535a
	{
kusano 7d535a
	    its = maxits + 10;
kusano 7d535a
	    break;
kusano 7d535a
	}
kusano 7d535a
	if (beta <= eps1)
kusano 7d535a
	    break;
kusano 7d535a
    } while(its < maxits);
kusano 7d535a
kusano 7d535a
    retval = (its >= maxits);
kusano 7d535a
    for (i = 0; i <= im; i++)
kusano 7d535a
	SUPERLU_FREE(vv[i]);
kusano 7d535a
    SUPERLU_FREE(vv);
kusano 7d535a
    for (i = 0; i < im; i++)
kusano 7d535a
    {
kusano 7d535a
	SUPERLU_FREE(hh[i]);
kusano 7d535a
	SUPERLU_FREE(z[i]);
kusano 7d535a
    }
kusano 7d535a
    SUPERLU_FREE(hh);
kusano 7d535a
    SUPERLU_FREE(z);
kusano 7d535a
    SUPERLU_FREE(c);
kusano 7d535a
    SUPERLU_FREE(s);
kusano 7d535a
    SUPERLU_FREE(rs);
kusano 7d535a
kusano 7d535a
    *itmax = its;
kusano 7d535a
kusano 7d535a
    return retval;
kusano 7d535a
} /*----end of fgmr ----*/