kusano 7d535a
kusano 7d535a
/*! @file zmyblas2.c
kusano 7d535a
 * \brief Level 2 Blas operations
kusano 7d535a
 * 
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 2.0) --
kusano 7d535a
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
kusano 7d535a
 * and Lawrence Berkeley National Lab.
kusano 7d535a
 * November 15, 1997
kusano 7d535a
 * 
kusano 7d535a
 * Purpose:
kusano 7d535a
 *     Level 2 BLAS operations: solves and matvec, written in C.
kusano 7d535a
 * Note:
kusano 7d535a
 *     This is only used when the system lacks an efficient BLAS library.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
/*
kusano 7d535a
 * File name:		zmyblas2.c
kusano 7d535a
 */
kusano 7d535a
#include "slu_dcomplex.h"
kusano 7d535a
kusano 7d535a
/*! \brief Solves a dense UNIT lower triangular system
kusano 7d535a
 * 
kusano 7d535a
 * The unit lower 
kusano 7d535a
 * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
kusano 7d535a
 * The solution will be returned in the rhs vector.
kusano 7d535a
 */
kusano 7d535a
void zlsolve ( int ldm, int ncol, doublecomplex *M, doublecomplex *rhs )
kusano 7d535a
{
kusano 7d535a
    int k;
kusano 7d535a
    doublecomplex x0, x1, x2, x3, temp;
kusano 7d535a
    doublecomplex *M0;
kusano 7d535a
    doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
kusano 7d535a
    register int firstcol = 0;
kusano 7d535a
kusano 7d535a
    M0 = &M[0];
kusano 7d535a
kusano 7d535a
kusano 7d535a
    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
kusano 7d535a
      	Mki0 = M0 + 1;
kusano 7d535a
      	Mki1 = Mki0 + ldm + 1;
kusano 7d535a
      	Mki2 = Mki1 + ldm + 1;
kusano 7d535a
      	Mki3 = Mki2 + ldm + 1;
kusano 7d535a
kusano 7d535a
      	x0 = rhs[firstcol];
kusano 7d535a
      	zz_mult(&temp, &x0, Mki0); Mki0++;
kusano 7d535a
      	z_sub(&x1, &rhs[firstcol+1], &temp);
kusano 7d535a
      	zz_mult(&temp, &x0, Mki0); Mki0++;
kusano 7d535a
	z_sub(&x2, &rhs[firstcol+2], &temp);
kusano 7d535a
	zz_mult(&temp, &x1, Mki1); Mki1++;
kusano 7d535a
	z_sub(&x2, &x2, &temp);
kusano 7d535a
      	zz_mult(&temp, &x0, Mki0); Mki0++;
kusano 7d535a
	z_sub(&x3, &rhs[firstcol+3], &temp);
kusano 7d535a
	zz_mult(&temp, &x1, Mki1); Mki1++;
kusano 7d535a
	z_sub(&x3, &x3, &temp);
kusano 7d535a
	zz_mult(&temp, &x2, Mki2); Mki2++;
kusano 7d535a
	z_sub(&x3, &x3, &temp);
kusano 7d535a
kusano 7d535a
 	rhs[++firstcol] = x1;
kusano 7d535a
      	rhs[++firstcol] = x2;
kusano 7d535a
      	rhs[++firstcol] = x3;
kusano 7d535a
      	++firstcol;
kusano 7d535a
    
kusano 7d535a
      	for (k = firstcol; k < ncol; k++) {
kusano 7d535a
	    zz_mult(&temp, &x0, Mki0); Mki0++;
kusano 7d535a
	    z_sub(&rhs[k], &rhs[k], &temp);
kusano 7d535a
	    zz_mult(&temp, &x1, Mki1); Mki1++;
kusano 7d535a
	    z_sub(&rhs[k], &rhs[k], &temp);
kusano 7d535a
	    zz_mult(&temp, &x2, Mki2); Mki2++;
kusano 7d535a
	    z_sub(&rhs[k], &rhs[k], &temp);
kusano 7d535a
	    zz_mult(&temp, &x3, Mki3); Mki3++;
kusano 7d535a
	    z_sub(&rhs[k], &rhs[k], &temp);
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
        M0 += 4 * ldm + 4;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
kusano 7d535a
        Mki0 = M0 + 1;
kusano 7d535a
        Mki1 = Mki0 + ldm + 1;
kusano 7d535a
kusano 7d535a
        x0 = rhs[firstcol];
kusano 7d535a
	zz_mult(&temp, &x0, Mki0); Mki0++;
kusano 7d535a
	z_sub(&x1, &rhs[firstcol+1], &temp);
kusano 7d535a
kusano 7d535a
      	rhs[++firstcol] = x1;
kusano 7d535a
      	++firstcol;
kusano 7d535a
    
kusano 7d535a
      	for (k = firstcol; k < ncol; k++) {
kusano 7d535a
	    zz_mult(&temp, &x0, Mki0); Mki0++;
kusano 7d535a
	    z_sub(&rhs[k], &rhs[k], &temp);
kusano 7d535a
	    zz_mult(&temp, &x1, Mki1); Mki1++;
kusano 7d535a
	    z_sub(&rhs[k], &rhs[k], &temp);
kusano 7d535a
	} 
kusano 7d535a
    }
kusano 7d535a
    
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/*! \brief Solves a dense upper triangular system. 
kusano 7d535a
 *
kusano 7d535a
 * The upper triangular matrix is
kusano 7d535a
 * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
kusano 7d535a
 * in the rhs vector.
kusano 7d535a
 */
kusano 7d535a
void
kusano 7d535a
zusolve ( ldm, ncol, M, rhs )
kusano 7d535a
int ldm;	/* in */
kusano 7d535a
int ncol;	/* in */
kusano 7d535a
doublecomplex *M;	/* in */
kusano 7d535a
doublecomplex *rhs;	/* modified */
kusano 7d535a
{
kusano 7d535a
    doublecomplex xj, temp;
kusano 7d535a
    int jcol, j, irow;
kusano 7d535a
kusano 7d535a
    jcol = ncol - 1;
kusano 7d535a
kusano 7d535a
    for (j = 0; j < ncol; j++) {
kusano 7d535a
kusano 7d535a
	z_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
kusano 7d535a
	rhs[jcol] = xj;
kusano 7d535a
	
kusano 7d535a
	for (irow = 0; irow < jcol; irow++) {
kusano 7d535a
	    zz_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
kusano 7d535a
	    z_sub(&rhs[irow], &rhs[irow], &temp);
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	jcol--;
kusano 7d535a
kusano 7d535a
    }
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
kusano 7d535a
 *
kusano 7d535a
 * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
kusano 7d535a
 */
kusano 7d535a
void zmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
kusano 7d535a
int ldm;	/* in -- leading dimension of M */
kusano 7d535a
int nrow;	/* in */ 
kusano 7d535a
int ncol;	/* in */
kusano 7d535a
doublecomplex *M;	/* in */
kusano 7d535a
doublecomplex *vec;	/* in */
kusano 7d535a
doublecomplex *Mxvec;	/* in/out */
kusano 7d535a
{
kusano 7d535a
    doublecomplex vi0, vi1, vi2, vi3;
kusano 7d535a
    doublecomplex *M0, temp;
kusano 7d535a
    doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
kusano 7d535a
    register int firstcol = 0;
kusano 7d535a
    int k;
kusano 7d535a
kusano 7d535a
    M0 = &M[0];
kusano 7d535a
kusano 7d535a
    while ( firstcol < ncol - 3 ) {	/* Do 4 columns */
kusano 7d535a
	Mki0 = M0;
kusano 7d535a
	Mki1 = Mki0 + ldm;
kusano 7d535a
	Mki2 = Mki1 + ldm;
kusano 7d535a
	Mki3 = Mki2 + ldm;
kusano 7d535a
kusano 7d535a
	vi0 = vec[firstcol++];
kusano 7d535a
	vi1 = vec[firstcol++];
kusano 7d535a
	vi2 = vec[firstcol++];
kusano 7d535a
	vi3 = vec[firstcol++];	
kusano 7d535a
	for (k = 0; k < nrow; k++) {
kusano 7d535a
	    zz_mult(&temp, &vi0, Mki0); Mki0++;
kusano 7d535a
	    z_add(&Mxvec[k], &Mxvec[k], &temp);
kusano 7d535a
	    zz_mult(&temp, &vi1, Mki1); Mki1++;
kusano 7d535a
	    z_add(&Mxvec[k], &Mxvec[k], &temp);
kusano 7d535a
	    zz_mult(&temp, &vi2, Mki2); Mki2++;
kusano 7d535a
	    z_add(&Mxvec[k], &Mxvec[k], &temp);
kusano 7d535a
	    zz_mult(&temp, &vi3, Mki3); Mki3++;
kusano 7d535a
	    z_add(&Mxvec[k], &Mxvec[k], &temp);
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	M0 += 4 * ldm;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    while ( firstcol < ncol ) {		/* Do 1 column */
kusano 7d535a
 	Mki0 = M0;
kusano 7d535a
	vi0 = vec[firstcol++];
kusano 7d535a
	for (k = 0; k < nrow; k++) {
kusano 7d535a
	    zz_mult(&temp, &vi0, Mki0); Mki0++;
kusano 7d535a
	    z_add(&Mxvec[k], &Mxvec[k], &temp);
kusano 7d535a
	}
kusano 7d535a
	M0 += ldm;
kusano 7d535a
    }
kusano 7d535a
	
kusano 7d535a
}
kusano 7d535a