kusano 7d535a
#include <stdio.h></stdio.h>
kusano 7d535a
#include <stdlib.h></stdlib.h>
kusano 7d535a
#include <string.h></string.h>
kusano 7d535a
#include <math.h></math.h>
kusano 7d535a
#include "slu_ddefs.h"
kusano 7d535a
kusano 7d535a
#define  epsmac  1.0e-16
kusano 7d535a
extern double ddot_(int *, double [], int *, double [], int *);
kusano 7d535a
extern double dnrm2_(int *, double [], int *);
kusano 7d535a
extern void daxpy_(int *, double *, double [], int *, double [], int *);
kusano 7d535a
extern double dcopy_(int *, double [], int *, double [], int *);
kusano 7d535a
kusano 7d535a
int fgmr(int n,
kusano 7d535a
     void (*matvec) (double, double[], double, double[]),
kusano 7d535a
     void (*psolve) (int, double[], double[]),
kusano 7d535a
     double *rhs, double *sol, double tol, int im, int *itmax, FILE * fits)
kusano 7d535a
{
kusano 7d535a
/*----------------------------------------------------------------------
kusano 7d535a
|                 *** Preconditioned FGMRES ***
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| This is a simple version of the ARMS preconditioned FGMRES algorithm.
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| Y. S. Dec. 2000. -- Apr. 2008
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| on entry:
kusano 7d535a
|----------
kusano 7d535a
|
kusano 7d535a
| rhs     = real vector of length n containing the right hand side.
kusano 7d535a
| sol     = real vector of length n containing an initial guess to the
kusano 7d535a
|           solution on input.
kusano 7d535a
| tol     = tolerance for stopping iteration
kusano 7d535a
| im      = Krylov subspace dimension
kusano 7d535a
| (itmax) = max number of iterations allowed.
kusano 7d535a
| fits    = NULL: no output
kusano 7d535a
|        != NULL: file handle to output " resid vs time and its"
kusano 7d535a
|
kusano 7d535a
| on return:
kusano 7d535a
|----------
kusano 7d535a
| fgmr      int =  0 --> successful return.
kusano 7d535a
|           int =  1 --> convergence not achieved in itmax iterations.
kusano 7d535a
| sol     = contains an approximate solution (upon successful return).
kusano 7d535a
| itmax   = has changed. It now contains the number of steps required
kusano 7d535a
|           to converge --
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| internal work arrays:
kusano 7d535a
|----------
kusano 7d535a
| vv      = work array of length [im+1][n] (used to store the Arnoldi
kusano 7d535a
|           basis)
kusano 7d535a
| hh      = work array of length [im][im+1] (Householder matrix)
kusano 7d535a
| z       = work array of length [im][n] to store preconditioned vectors
kusano 7d535a
+-----------------------------------------------------------------------
kusano 7d535a
| subroutines called :
kusano 7d535a
| matvec - matrix-vector multiplication operation
kusano 7d535a
| psolve - (right) preconditionning operation
kusano 7d535a
|	   psolve can be a NULL pointer (GMRES without preconditioner)
kusano 7d535a
+---------------------------------------------------------------------*/
kusano 7d535a
kusano 7d535a
    int maxits = *itmax;
kusano 7d535a
    int i, i1, ii, j, k, k1, its, retval, i_1 = 1;
kusano 7d535a
    double **hh, *c, *s, *rs, t, t0;
kusano 7d535a
    double beta, eps1 = 0.0, gam, **vv, **z;
kusano 7d535a
kusano 7d535a
    its = 0;
kusano 7d535a
    vv = (double **)SUPERLU_MALLOC((im + 1) * sizeof(double *));
kusano 7d535a
    for (i = 0; i <= im; i++)
kusano 7d535a
	vv[i] = doubleMalloc(n);
kusano 7d535a
    z = (double **)SUPERLU_MALLOC(im * sizeof(double *));
kusano 7d535a
    hh = (double **)SUPERLU_MALLOC(im * sizeof(double *));
kusano 7d535a
    for (i = 0; i < im; i++)
kusano 7d535a
    {
kusano 7d535a
	hh[i] = doubleMalloc(i + 2);
kusano 7d535a
	z[i] = doubleMalloc(n);
kusano 7d535a
    }
kusano 7d535a
    c = doubleMalloc(im);
kusano 7d535a
    s = doubleMalloc(im);
kusano 7d535a
    rs = doubleMalloc(im + 1);
kusano 7d535a
kusano 7d535a
    /*---- outer loop starts here ----*/
kusano 7d535a
    do
kusano 7d535a
    {
kusano 7d535a
	/*---- compute initial residual vector ----*/
kusano 7d535a
	matvec(1.0, sol, 0.0, vv[0]);
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    vv[0][j] = rhs[j] - vv[0][j];	/* vv[0]= initial residual */
kusano 7d535a
	beta = dnrm2_(&n, vv[0], &i_1);
kusano 7d535a
kusano 7d535a
	/*---- print info if fits != null ----*/
kusano 7d535a
	if (fits != NULL && its == 0)
kusano 7d535a
	    fprintf(fits, "%8d   %10.2e\n", its, beta);
kusano 7d535a
	/*if ( beta < tol * dnrm2_(&n, rhs, &i_1) )*/
kusano 7d535a
	if ( !(beta >= tol * dnrm2_(&n, rhs, &i_1)) )
kusano 7d535a
	    break;
kusano 7d535a
	t = 1.0 / beta;
kusano 7d535a
kusano 7d535a
	/*---- normalize: vv[0] = vv[0] / beta ----*/
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    vv[0][j] = vv[0][j] * t;
kusano 7d535a
	if (its == 0)
kusano 7d535a
	    eps1 = tol * beta;
kusano 7d535a
kusano 7d535a
	/*---- initialize 1-st term of rhs of hessenberg system ----*/
kusano 7d535a
	rs[0] = beta;
kusano 7d535a
	for (i = 0; i < im; i++)
kusano 7d535a
	{
kusano 7d535a
	    its++;
kusano 7d535a
	    i1 = i + 1;
kusano 7d535a
kusano 7d535a
	    /*------------------------------------------------------------
kusano 7d535a
	    |  (Right) Preconditioning Operation   z_{j} = M^{-1} v_{j}
kusano 7d535a
	    +-----------------------------------------------------------*/
kusano 7d535a
	    if (psolve)
kusano 7d535a
		psolve(n, z[i], vv[i]);
kusano 7d535a
	    else
kusano 7d535a
		dcopy_(&n, vv[i], &i_1, z[i], &i_1);
kusano 7d535a
kusano 7d535a
	    /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
kusano 7d535a
	    matvec(1.0, z[i], 0.0, vv[i1]);
kusano 7d535a
kusano 7d535a
	    /*------------------------------------------------------------
kusano 7d535a
	    |     modified gram - schmidt...
kusano 7d535a
	    |     h_{i,j} = (w,v_{i})
kusano 7d535a
	    |     w  = w - h_{i,j} v_{i}
kusano 7d535a
	    +------------------------------------------------------------*/
kusano 7d535a
	    t0 = dnrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    for (j = 0; j <= i; j++)
kusano 7d535a
	    {
kusano 7d535a
		double negt;
kusano 7d535a
		t = ddot_(&n, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
		hh[i][j] = t;
kusano 7d535a
		negt = -t;
kusano 7d535a
		daxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    /*---- h_{j+1,j} = ||w||_{2} ----*/
kusano 7d535a
	    t = dnrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    while (t < 0.5 * t0)
kusano 7d535a
	    {
kusano 7d535a
		t0 = t;
kusano 7d535a
		for (j = 0; j <= i; j++)
kusano 7d535a
		{
kusano 7d535a
		    double negt;
kusano 7d535a
		    t = ddot_(&n, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
		    hh[i][j] += t;
kusano 7d535a
		    negt = -t;
kusano 7d535a
		    daxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
kusano 7d535a
		}
kusano 7d535a
		t = dnrm2_(&n, vv[i1], &i_1);
kusano 7d535a
	    }
kusano 7d535a
	    hh[i][i1] = t;
kusano 7d535a
	    if (t != 0.0)
kusano 7d535a
	    {
kusano 7d535a
		/*---- v_{j+1} = w / h_{j+1,j} ----*/
kusano 7d535a
		t = 1.0 / t;
kusano 7d535a
		for (k = 0; k < n; k++)
kusano 7d535a
		    vv[i1][k] = vv[i1][k] * t;
kusano 7d535a
	    }
kusano 7d535a
	    /*---------------------------------------------------
kusano 7d535a
	    |     done with modified gram schimdt and arnoldi step
kusano 7d535a
	    |     now  update factorization of hh
kusano 7d535a
	    +--------------------------------------------------*/
kusano 7d535a
kusano 7d535a
	    /*--------------------------------------------------------
kusano 7d535a
	    |   perform previous transformations  on i-th column of h
kusano 7d535a
	    +-------------------------------------------------------*/
kusano 7d535a
	    for (k = 1; k <= i; k++)
kusano 7d535a
	    {
kusano 7d535a
		k1 = k - 1;
kusano 7d535a
		t = hh[i][k1];
kusano 7d535a
		hh[i][k1] = c[k1] * t + s[k1] * hh[i][k];
kusano 7d535a
		hh[i][k] = -s[k1] * t + c[k1] * hh[i][k];
kusano 7d535a
	    }
kusano 7d535a
	    gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2));
kusano 7d535a
kusano 7d535a
	    /*---------------------------------------------------
kusano 7d535a
	    |     if gamma is zero then any small value will do
kusano 7d535a
	    |     affect only residual estimate
kusano 7d535a
	    +--------------------------------------------------*/
kusano 7d535a
	    /* if (gam == 0.0) gam = epsmac; */
kusano 7d535a
kusano 7d535a
	    /*---- get next plane rotation ---*/
kusano 7d535a
	    if (gam > 0.0)
kusano 7d535a
	    {
kusano 7d535a
		c[i] = hh[i][i] / gam;
kusano 7d535a
		s[i] = hh[i][i1] / gam;
kusano 7d535a
	    }
kusano 7d535a
	    else
kusano 7d535a
	    {
kusano 7d535a
		c[i] = 1.0;
kusano 7d535a
		s[i] = 0.0;
kusano 7d535a
	    }
kusano 7d535a
	    rs[i1] = -s[i] * rs[i];
kusano 7d535a
	    rs[i] = c[i] * rs[i];
kusano 7d535a
kusano 7d535a
	    /*----------------------------------------------------
kusano 7d535a
	    |   determine residual norm and test for convergence
kusano 7d535a
	    +---------------------------------------------------*/
kusano 7d535a
	    hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1];
kusano 7d535a
	    beta = fabs(rs[i1]);
kusano 7d535a
	    if (fits != NULL)
kusano 7d535a
		fprintf(fits, "%8d   %10.2e\n", its, beta);
kusano 7d535a
	    if (beta <= eps1 || its >= maxits)
kusano 7d535a
		break;
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	if (i == im) i--;
kusano 7d535a
	/*---- now compute solution. 1st, solve upper triangular system ----*/
kusano 7d535a
	rs[i] = rs[i] / hh[i][i];
kusano 7d535a
	for (ii = 1; ii <= i; ii++)
kusano 7d535a
	{
kusano 7d535a
	    k = i - ii;
kusano 7d535a
	    k1 = k + 1;
kusano 7d535a
	    t = rs[k];
kusano 7d535a
	    for (j = k1; j <= i; j++)
kusano 7d535a
		t = t - hh[j][k] * rs[j];
kusano 7d535a
	    rs[k] = t / hh[k][k];
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	/*---- linear combination of v[i]'s to get sol. ----*/
kusano 7d535a
	for (j = 0; j <= i; j++)
kusano 7d535a
	{
kusano 7d535a
	    t = rs[j];
kusano 7d535a
	    for (k = 0; k < n; k++)
kusano 7d535a
		sol[k] += t * z[j][k];
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	/* calculate the residual and output */
kusano 7d535a
	matvec(1.0, sol, 0.0, vv[0]);
kusano 7d535a
	for (j = 0; j < n; j++)
kusano 7d535a
	    vv[0][j] = rhs[j] - vv[0][j];	/* vv[0]= initial residual */
kusano 7d535a
kusano 7d535a
	/*---- print info if fits != null ----*/
kusano 7d535a
	beta = dnrm2_(&n, vv[0], &i_1);
kusano 7d535a
kusano 7d535a
	/*---- restart outer loop if needed ----*/
kusano 7d535a
	/*if (beta >= eps1 / tol)*/
kusano 7d535a
	if ( !(beta < eps1 / tol) )
kusano 7d535a
	{
kusano 7d535a
	    its = maxits + 10;
kusano 7d535a
	    break;
kusano 7d535a
	}
kusano 7d535a
	if (beta <= eps1)
kusano 7d535a
	    break;
kusano 7d535a
    } while(its < maxits);
kusano 7d535a
kusano 7d535a
    retval = (its >= maxits);
kusano 7d535a
    for (i = 0; i <= im; i++)
kusano 7d535a
	SUPERLU_FREE(vv[i]);
kusano 7d535a
    SUPERLU_FREE(vv);
kusano 7d535a
    for (i = 0; i < im; i++)
kusano 7d535a
    {
kusano 7d535a
	SUPERLU_FREE(hh[i]);
kusano 7d535a
	SUPERLU_FREE(z[i]);
kusano 7d535a
    }
kusano 7d535a
    SUPERLU_FREE(hh);
kusano 7d535a
    SUPERLU_FREE(z);
kusano 7d535a
    SUPERLU_FREE(c);
kusano 7d535a
    SUPERLU_FREE(s);
kusano 7d535a
    SUPERLU_FREE(rs);
kusano 7d535a
kusano 7d535a
    *itmax = its;
kusano 7d535a
kusano 7d535a
    return retval;
kusano 7d535a
} /*----end of fgmr ----*/