kusano 7d535a
kusano 7d535a
/*! @file cfgmr.c
kusano 7d535a
 * \brief flexible GMRES from ITSOL developed by Yousef Saad.
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
/* ITSOL COPYRIGHT
kusano 7d535a
kusano 7d535a
Copyright (C) 2006, the University of Minnesota 
kusano 7d535a
kusano 7d535a
ITSOL is free software; you can redistribute it and/or modify it under
kusano 7d535a
the terms of  the GNU General Public License as  published by the Free
kusano 7d535a
Software Foundation [version 2 of  the License, or any later version]
kusano 7d535a
For details, see 
kusano 7d535a
kusano 7d535a
http://www.gnu.org/copyleft/gpl.html
kusano 7d535a
kusano 7d535a
A copy of the GNU licencing agreement is attached to the ITSOL package
kusano 7d535a
in the file GNU.  For additional information contact the Free Software
kusano 7d535a
Foundation Inc., 65 Mass Ave, Cambridge, MA 02139, USA.
kusano 7d535a
kusano 7d535a
DISCLAIMER
kusano 7d535a
----------
kusano 7d535a
kusano 7d535a
This program  is distributed in the  hope that it will  be useful, but
kusano 7d535a
WITHOUT   ANY  WARRANTY;   without  even   the  implied   warranty  of
kusano 7d535a
MERCHANTABILITY  or FITNESS  FOR A  PARTICULAR PURPOSE.   See  the GNU
kusano 7d535a
General Public License for more details. 
kusano 7d535a
kusano 7d535a
For information on ITSOL contact saad@cs.umn.edu
kusano 7d535a
*/
kusano 7d535a
kusano 7d535a
#include "slu_cdefs.h"
kusano 7d535a
kusano 7d535a
#define  epsmac  1.0e-16
kusano 7d535a
kusano 7d535a
extern void cdotc_(complex *, int *, complex [], int *, complex [], int *);
kusano 7d535a
extern float scnrm2_(int *, complex [], int *);
kusano 7d535a
kusano 7d535a
kusano 7d535a
int cfgmr(int n,
kusano 7d535a
     void (*cmatvec) (complex, complex[], complex, complex[]),
kusano 7d535a
     void (*cpsolve) (int, complex[], complex[]),
kusano 7d535a
     complex *rhs, complex *sol, double tol, int im, int *itmax, FILE * fits)
kusano 7d535a
{
kusano 7d535a
/*----------------------------------------------------------------------
kusano 7d535a
|                 *** Preconditioned FGMRES ***
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| This is a simple version of the ARMS preconditioned FGMRES algorithm.
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| Y. S. Dec. 2000. -- Apr. 2008
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| on entry:
kusano 7d535a
|----------
kusano 7d535a
|
kusano 7d535a
| rhs     = real vector of length n containing the right hand side.
kusano 7d535a
| sol     = real vector of length n containing an initial guess to the
kusano 7d535a
|           solution on input.
kusano 7d535a
| tol     = tolerance for stopping iteration
kusano 7d535a
| im      = Krylov subspace dimension
kusano 7d535a
| (itmax) = max number of iterations allowed.
kusano 7d535a
| fits    = NULL: no output
kusano 7d535a
|        != NULL: file handle to output " resid vs time and its"
kusano 7d535a
|
kusano 7d535a
| on return:
kusano 7d535a
|----------
kusano 7d535a
| fgmr      int =  0 --> successful return.
kusano 7d535a
|           int =  1 --> convergence not achieved in itmax iterations.
kusano 7d535a
| sol     = contains an approximate solution (upon successful return).
kusano 7d535a
| itmax   = has changed. It now contains the number of steps required
kusano 7d535a
|           to converge --
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| internal work arrays:
kusano 7d535a
|----------
kusano 7d535a
| vv      = work array of length [im+1][n] (used to store the Arnoldi
kusano 7d535a
|           basis)
kusano 7d535a
| hh      = work array of length [im][im+1] (Householder matrix)
kusano 7d535a
| z       = work array of length [im][n] to store preconditioned vectors
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| subroutines called :
kusano 7d535a
| matvec - matrix-vector multiplication operation
kusano 7d535a
| psolve - (right) preconditionning operation
kusano 7d535a
|	   psolve can be a NULL pointer (GMRES without preconditioner)
kusano 7d535a
+---------------------------------------------------------------------*/
kusano 7d535a
kusano 7d535a
    int maxits = *itmax;
kusano 7d535a
    int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2;
kusano 7d535a
    float beta, eps1 = 0.0, t, t0, gam;
kusano 7d535a
    complex **hh, *c, *s, *rs;
kusano 7d535a
    complex **vv, **z, tt;
kusano 7d535a
    complex zero = {0.0, 0.0};
kusano 7d535a
    complex one = {1.0, 0.0};
kusano 7d535a
    complex tt1, tt2;
kusano 7d535a
kusano 7d535a
    its = 0;
kusano 7d535a
    vv = (complex **)SUPERLU_MALLOC((im + 1) * sizeof(complex *));
kusano 7d535a
    for (i = 0; i <= im; i++) vv[i] = complexMalloc(n);
kusano 7d535a
    z = (complex **)SUPERLU_MALLOC(im * sizeof(complex *));
kusano 7d535a
    hh = (complex **)SUPERLU_MALLOC(im * sizeof(complex *));
kusano 7d535a
    for (i = 0; i < im; i++)
kusano 7d535a
    {
kusano 7d535a
	hh[i] = complexMalloc(i + 2);
kusano 7d535a
	z[i] = complexMalloc(n);
kusano 7d535a
    }
kusano 7d535a
    c = complexMalloc(im);
kusano 7d535a
    s = complexMalloc(im);
kusano 7d535a
    rs = complexMalloc(im + 1);
kusano 7d535a
kusano 7d535a
    /*---- outer loop starts here ----*/
kusano 7d535a
    do
kusano 7d535a
    {
kusano 7d535a
	/*---- compute initial residual vector ----*/
kusano 7d535a
	cmatvec(one, sol, zero, vv[0]);
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    c_sub(&vv[0][j], &rhs[j], &vv[0][j]);	/* vv[0]= initial residual */
kusano 7d535a
	beta = scnrm2_(&n, vv[0], &i_1);
kusano 7d535a
kusano 7d535a
	/*---- print info if fits != null ----*/
kusano 7d535a
	if (fits != NULL && its == 0)
kusano 7d535a
	    fprintf(fits, "%8d   %10.2e\n", its, beta);
kusano 7d535a
	/*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/
kusano 7d535a
	if ( !(beta > tol * scnrm2_(&n, rhs, &i_1)) )
kusano 7d535a
	    break;
kusano 7d535a
	t = 1.0 / beta;
kusano 7d535a
kusano 7d535a
	/*---- normalize: vv[0] = vv[0] / beta ----*/
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    cs_mult(&vv[0][j], &vv[0][j], t);
kusano 7d535a
	if (its == 0)
kusano 7d535a
	    eps1 = tol * beta;
kusano 7d535a
kusano 7d535a
	/*---- initialize 1-st term of rhs of hessenberg system ----*/
kusano 7d535a
	rs[0].r = beta;
kusano 7d535a
	rs[0].i = 0.0;
kusano 7d535a
	for (i = 0; i < im; i++)
kusano 7d535a
	{
kusano 7d535a
	    its++;
kusano 7d535a
	    i1 = i + 1;
kusano 7d535a
kusano 7d535a
	    /*------------------------------------------------------------
kusano 7d535a
	    |  (Right) Preconditioning Operation   z_{j} = M^{-1} v_{j}
kusano 7d535a
	    +-----------------------------------------------------------*/
kusano 7d535a
	    if (cpsolve)
kusano 7d535a
		cpsolve(n, z[i], vv[i]);
kusano 7d535a
	    else
kusano 7d535a
		ccopy_(&n, vv[i], &i_1, z[i], &i_1);
kusano 7d535a
kusano 7d535a
	    /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
kusano 7d535a
	    cmatvec(one, z[i], zero, vv[i1]);
kusano 7d535a
kusano 7d535a
	    /*------------------------------------------------------------
kusano 7d535a
	    |     modified gram - schmidt...
kusano 7d535a
	    |     h_{i,j} = (w,v_{i})
kusano 7d535a
	    |     w  = w - h_{i,j} v_{i}
kusano 7d535a
	    +------------------------------------------------------------*/
kusano 7d535a
	    t0 = scnrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    for (j = 0; j <= i; j++)
kusano 7d535a
	    {
kusano 7d535a
		complex negt;
kusano 7d535a
#if 0
kusano 7d535a
		cdotc_(&tt, &n, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
#else
kusano 7d535a
		tt = zero;
kusano 7d535a
		for (k = 0; k < n; ++k) {
kusano 7d535a
		    cc_conj(&tt1, &vv[j][k]);
kusano 7d535a
		    cc_mult(&tt2, &tt1, &vv[i1][k]);
kusano 7d535a
		    c_add(&tt, &tt, &tt2);
kusano 7d535a
		}
kusano 7d535a
#endif
kusano 7d535a
		hh[i][j] = tt;
kusano 7d535a
		negt.r = -tt.r;
kusano 7d535a
		negt.i = -tt.i;
kusano 7d535a
		caxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    /*---- h_{j+1,j} = ||w||_{2} ----*/
kusano 7d535a
	    t = scnrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    while (t < 0.5 * t0)
kusano 7d535a
	    {
kusano 7d535a
		t0 = t;
kusano 7d535a
		for (j = 0; j <= i; j++)
kusano 7d535a
		{
kusano 7d535a
		    complex negt;
kusano 7d535a
#if 0
kusano 7d535a
		    cdotc_(&tt, &n, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
#else
kusano 7d535a
   	            tt = zero;
kusano 7d535a
		    for (k = 0; k < n; ++k) {
kusano 7d535a
		        cc_conj(&tt1, &vv[j][k]);
kusano 7d535a
		        cc_mult(&tt2, &tt1, &vv[i1][k]);
kusano 7d535a
		        c_add(&tt, &tt, &tt2);
kusano 7d535a
		    }
kusano 7d535a
#endif
kusano 7d535a
		    c_add(&hh[i][j], &hh[i][j], &tt);
kusano 7d535a
		    negt.r = -tt.r;
kusano 7d535a
		    negt.i = -tt.i;
kusano 7d535a
		    caxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
		}
kusano 7d535a
		t = scnrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    hh[i][i1].r = t;
kusano 7d535a
	    hh[i][i1].i = 0.0;
kusano 7d535a
kusano 7d535a
	    if (t != 0.0)
kusano 7d535a
	    {
kusano 7d535a
		/*---- v_{j+1} = w / h_{j+1,j} ----*/
kusano 7d535a
		t = 1.0 / t;
kusano 7d535a
		for (k = 0; k < n; k++)
kusano 7d535a
	            cs_mult(&vv[i1][k], &vv[i1][k], t);
kusano 7d535a
	    }
kusano 7d535a
	    /*---------------------------------------------------
kusano 7d535a
	    |     done with modified gram schimdt and arnoldi step
kusano 7d535a
	    |     now  update factorization of hh
kusano 7d535a
	    +--------------------------------------------------*/
kusano 7d535a
kusano 7d535a
	    /*--------------------------------------------------------
kusano 7d535a
	    |   perform previous transformations  on i-th column of h
kusano 7d535a
	    +-------------------------------------------------------*/
kusano 7d535a
	    for (k = 1; k <= i; k++)
kusano 7d535a
	    {
kusano 7d535a
		k1 = k - 1;
kusano 7d535a
		tt = hh[i][k1];
kusano 7d535a
                cc_mult(&tt1, &c[k1], &tt);
kusano 7d535a
                cc_mult(&tt2, &s[k1], &hh[i][k]);
kusano 7d535a
                c_add(&hh[i][k1], &tt1, &tt2);
kusano 7d535a
kusano 7d535a
                cc_mult(&tt1, &s[k1], &tt);
kusano 7d535a
                cc_mult(&tt2, &c[k1], &hh[i][k]);
kusano 7d535a
                c_sub(&hh[i][k], &tt2, &tt1);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    gam = scnrm2_(&i_2, &hh[i][i], &i_1);
kusano 7d535a
kusano 7d535a
	    /*---------------------------------------------------
kusano 7d535a
	    |     if gamma is zero then any small value will do
kusano 7d535a
	    |     affect only residual estimate
kusano 7d535a
	    +--------------------------------------------------*/
kusano 7d535a
	    /* if (gam == 0.0) gam = epsmac; */
kusano 7d535a
kusano 7d535a
	    /*---- get next plane rotation ---*/
kusano 7d535a
	    if (gam == 0.0)
kusano 7d535a
	    {
kusano 7d535a
		c[i] = one;
kusano 7d535a
		s[i] = zero;
kusano 7d535a
	    }
kusano 7d535a
            else
kusano 7d535a
	    {
kusano 7d535a
                gam = 1.0 / gam;
kusano 7d535a
		cs_mult(&c[i], &hh[i][i], gam);
kusano 7d535a
		cs_mult(&s[i], &hh[i][i1], gam);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    cc_mult(&rs[i1], &s[i], &rs[i]);
kusano 7d535a
            rs[i1].r = -rs[i1].r;  rs[i1].i = -rs[i1].i;
kusano 7d535a
	    cc_mult(&rs[i], &c[i], &rs[i]);
kusano 7d535a
kusano 7d535a
	    /*----------------------------------------------------
kusano 7d535a
	    |   determine residual norm and test for convergence
kusano 7d535a
	    +---------------------------------------------------*/
kusano 7d535a
            cc_mult(&tt1, &c[i], &hh[i][i]);
kusano 7d535a
            cc_mult(&tt2, &s[i], &hh[i][i1]);
kusano 7d535a
            c_add(&hh[i][i], &tt1, &tt2);
kusano 7d535a
            beta = c_abs1(&rs[i1]);
kusano 7d535a
	    if (fits != NULL)
kusano 7d535a
		fprintf(fits, "%8d   %10.2e\n", its, beta);
kusano 7d535a
	    if (beta <= eps1 || its >= maxits)
kusano 7d535a
		break;
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	if (i == im) i--;
kusano 7d535a
kusano 7d535a
	/*---- now compute solution. 1st, solve upper triangular system ----*/
kusano 7d535a
	c_div(&rs[i], &rs[i], &hh[i][i]);
kusano 7d535a
kusano 7d535a
	for (ii = 1; ii <= i; ii++)
kusano 7d535a
	{
kusano 7d535a
	    k = i - ii;
kusano 7d535a
	    k1 = k + 1;
kusano 7d535a
	    tt = rs[k];
kusano 7d535a
	    for (j = k1; j <= i; j++) {
kusano 7d535a
                cc_mult(&tt1, &hh[j][k], &rs[j]);
kusano 7d535a
		c_sub(&tt, &tt, &tt1);
kusano 7d535a
            }
kusano 7d535a
            c_div(&rs[k], &tt, &hh[k][k]);
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	/*---- linear combination of v[i]'s to get sol. ----*/
kusano 7d535a
	for (j = 0; j <= i; j++)
kusano 7d535a
	{
kusano 7d535a
	    tt = rs[j];
kusano 7d535a
	    for (k = 0; k < n; k++) {
kusano 7d535a
                cc_mult(&tt1, &tt, &z[j][k]);
kusano 7d535a
		c_add(&sol[k], &sol[k], &tt1);
kusano 7d535a
            }
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	/* calculate the residual and output */
kusano 7d535a
	cmatvec(one, sol, zero, vv[0]);
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    c_sub(&vv[0][j], &rhs[j], &vv[0][j]);/* vv[0]= initial residual */
kusano 7d535a
kusano 7d535a
	/*---- print info if fits != null ----*/
kusano 7d535a
	beta = scnrm2_(&n, vv[0], &i_1);
kusano 7d535a
kusano 7d535a
	/*---- restart outer loop if needed ----*/
kusano 7d535a
	/*if (beta >= eps1 / tol)*/
kusano 7d535a
	if ( !(beta < eps1 / tol) )
kusano 7d535a
	{
kusano 7d535a
	    its = maxits + 10;
kusano 7d535a
	    break;
kusano 7d535a
	}
kusano 7d535a
	if (beta <= eps1)
kusano 7d535a
	    break;
kusano 7d535a
    } while(its < maxits);
kusano 7d535a
kusano 7d535a
    retval = (its >= maxits);
kusano 7d535a
    for (i = 0; i <= im; i++)
kusano 7d535a
	SUPERLU_FREE(vv[i]);
kusano 7d535a
    SUPERLU_FREE(vv);
kusano 7d535a
    for (i = 0; i < im; i++)
kusano 7d535a
    {
kusano 7d535a
	SUPERLU_FREE(hh[i]);
kusano 7d535a
	SUPERLU_FREE(z[i]);
kusano 7d535a
    }
kusano 7d535a
    SUPERLU_FREE(hh);
kusano 7d535a
    SUPERLU_FREE(z);
kusano 7d535a
    SUPERLU_FREE(c);
kusano 7d535a
    SUPERLU_FREE(s);
kusano 7d535a
    SUPERLU_FREE(rs);
kusano 7d535a
kusano 7d535a
    *itmax = its;
kusano 7d535a
kusano 7d535a
    return retval;
kusano 7d535a
} /*----end of fgmr ----*/