kusano 7d535a
kusano 7d535a
/*! @file csp_blas2.c
kusano 7d535a
 * \brief Sparse BLAS 2, using some dense BLAS 2 operations
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 3.0) --
kusano 7d535a
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
kusano 7d535a
 * and Lawrence Berkeley National Lab.
kusano 7d535a
 * October 15, 2003
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
/*
kusano 7d535a
 * File name:		csp_blas2.c
kusano 7d535a
 * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
#include "slu_cdefs.h"
kusano 7d535a
kusano 7d535a
/* 
kusano 7d535a
 * Function prototypes 
kusano 7d535a
 */
kusano 7d535a
void cusolve(int, int, complex*, complex*);
kusano 7d535a
void clsolve(int, int, complex*, complex*);
kusano 7d535a
void cmatvec(int, int, int, complex*, complex*, complex*);
kusano 7d535a
kusano 7d535a
/*! \brief Solves one of the systems of equations A*x = b,   or   A'*x = b
kusano 7d535a
 * 
kusano 7d535a
 * 
kusano 7d535a
 *   Purpose
kusano 7d535a
 *   =======
kusano 7d535a
 *
kusano 7d535a
 *   sp_ctrsv() solves one of the systems of equations   
kusano 7d535a
 *       A*x = b,   or   A'*x = b,
kusano 7d535a
 *   where b and x are n element vectors and A is a sparse unit , or   
kusano 7d535a
 *   non-unit, upper or lower triangular matrix.   
kusano 7d535a
 *   No test for singularity or near-singularity is included in this   
kusano 7d535a
 *   routine. Such tests must be performed before calling this routine.   
kusano 7d535a
 *
kusano 7d535a
 *   Parameters   
kusano 7d535a
 *   ==========   
kusano 7d535a
 *
kusano 7d535a
 *   uplo   - (input) char*
kusano 7d535a
 *            On entry, uplo specifies whether the matrix is an upper or   
kusano 7d535a
 *             lower triangular matrix as follows:   
kusano 7d535a
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
kusano 7d535a
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
kusano 7d535a
 *
kusano 7d535a
 *   trans  - (input) char*
kusano 7d535a
 *             On entry, trans specifies the equations to be solved as   
kusano 7d535a
 *             follows:   
kusano 7d535a
 *                trans = 'N' or 'n'   A*x = b.   
kusano 7d535a
 *                trans = 'T' or 't'   A'*x = b.
kusano 7d535a
 *                trans = 'C' or 'c'   A^H*x = b.   
kusano 7d535a
 *
kusano 7d535a
 *   diag   - (input) char*
kusano 7d535a
 *             On entry, diag specifies whether or not A is unit   
kusano 7d535a
 *             triangular as follows:   
kusano 7d535a
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
kusano 7d535a
 *                diag = 'N' or 'n'   A is not assumed to be unit   
kusano 7d535a
 *                                    triangular.   
kusano 7d535a
 *	     
kusano 7d535a
 *   L       - (input) SuperMatrix*
kusano 7d535a
 *	       The factor L from the factorization Pr*A*Pc=L*U. Use
kusano 7d535a
 *             compressed row subscripts storage for supernodes,
kusano 7d535a
 *             i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
kusano 7d535a
 *
kusano 7d535a
 *   U       - (input) SuperMatrix*
kusano 7d535a
 *	        The factor U from the factorization Pr*A*Pc=L*U.
kusano 7d535a
 *	        U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
kusano 7d535a
 *    
kusano 7d535a
 *   x       - (input/output) complex*
kusano 7d535a
 *             Before entry, the incremented array X must contain the n   
kusano 7d535a
 *             element right-hand side vector b. On exit, X is overwritten 
kusano 7d535a
 *             with the solution vector x.
kusano 7d535a
 *
kusano 7d535a
 *   info    - (output) int*
kusano 7d535a
 *             If *info = -i, the i-th argument had an illegal value.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
int
kusano 7d535a
sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
kusano 7d535a
         SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info)
kusano 7d535a
{
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
kusano 7d535a
	 ftcs2 = _cptofcd("N", strlen("N")),
kusano 7d535a
	 ftcs3 = _cptofcd("U", strlen("U"));
kusano 7d535a
#endif
kusano 7d535a
    SCformat *Lstore;
kusano 7d535a
    NCformat *Ustore;
kusano 7d535a
    complex   *Lval, *Uval;
kusano 7d535a
    int incx = 1, incy = 1;
kusano 7d535a
    complex temp;
kusano 7d535a
    complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
kusano 7d535a
    complex comp_zero = {0.0, 0.0};
kusano 7d535a
    int nrow;
kusano 7d535a
    int fsupc, nsupr, nsupc, luptr, istart, irow;
kusano 7d535a
    int i, k, iptr, jcol;
kusano 7d535a
    complex *work;
kusano 7d535a
    flops_t solve_ops;
kusano 7d535a
kusano 7d535a
    /* Test the input parameters */
kusano 7d535a
    *info = 0;
kusano 7d535a
    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
kusano 7d535a
    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
kusano 7d535a
              !lsame_(trans, "C")) *info = -2;
kusano 7d535a
    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
kusano 7d535a
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
kusano 7d535a
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
kusano 7d535a
    if ( *info ) {
kusano 7d535a
	i = -(*info);
kusano 7d535a
	xerbla_("sp_ctrsv", &i);
kusano 7d535a
	return 0;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    Lstore = L->Store;
kusano 7d535a
    Lval = Lstore->nzval;
kusano 7d535a
    Ustore = U->Store;
kusano 7d535a
    Uval = Ustore->nzval;
kusano 7d535a
    solve_ops = 0;
kusano 7d535a
kusano 7d535a
    if ( !(work = complexCalloc(L->nrow)) )
kusano 7d535a
	ABORT("Malloc fails for work in sp_ctrsv().");
kusano 7d535a
    
kusano 7d535a
    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
kusano 7d535a
	
kusano 7d535a
	if ( lsame_(uplo, "L") ) {
kusano 7d535a
	    /* Form x := inv(L)*x */
kusano 7d535a
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
kusano 7d535a
	    
kusano 7d535a
	    for (k = 0; k <= Lstore->nsuper; k++) {
kusano 7d535a
		fsupc = L_FST_SUPC(k);
kusano 7d535a
		istart = L_SUB_START(fsupc);
kusano 7d535a
		nsupr = L_SUB_START(fsupc+1) - istart;
kusano 7d535a
		nsupc = L_FST_SUPC(k+1) - fsupc;
kusano 7d535a
		luptr = L_NZ_START(fsupc);
kusano 7d535a
		nrow = nsupr - nsupc;
kusano 7d535a
kusano 7d535a
                /* 1 c_div costs 10 flops */
kusano 7d535a
	        solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
kusano 7d535a
	        solve_ops += 8 * nrow * nsupc;
kusano 7d535a
kusano 7d535a
		if ( nsupc == 1 ) {
kusano 7d535a
		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
kusano 7d535a
			irow = L_SUB(iptr);
kusano 7d535a
			++luptr;
kusano 7d535a
			cc_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
kusano 7d535a
			c_sub(&x[irow], &x[irow], &comp_zero);
kusano 7d535a
		    }
kusano 7d535a
		} else {
kusano 7d535a
#ifdef USE_VENDOR_BLAS
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
		       	&x[fsupc], &incx);
kusano 7d535a
		
kusano 7d535a
		    CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
kusano 7d535a
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
kusano 7d535a
#else
kusano 7d535a
		    ctrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
		       	&x[fsupc], &incx);
kusano 7d535a
		
kusano 7d535a
		    cgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
kusano 7d535a
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
kusano 7d535a
#endif
kusano 7d535a
#else
kusano 7d535a
		    clsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
kusano 7d535a
		
kusano 7d535a
		    cmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
kusano 7d535a
                             &x[fsupc], &work[0] );
kusano 7d535a
#endif		
kusano 7d535a
		
kusano 7d535a
		    iptr = istart + nsupc;
kusano 7d535a
		    for (i = 0; i < nrow; ++i, ++iptr) {
kusano 7d535a
			irow = L_SUB(iptr);
kusano 7d535a
			c_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
kusano 7d535a
			work[i] = comp_zero;
kusano 7d535a
kusano 7d535a
		    }
kusano 7d535a
	 	}
kusano 7d535a
	    } /* for k ... */
kusano 7d535a
	    
kusano 7d535a
	} else {
kusano 7d535a
	    /* Form x := inv(U)*x */
kusano 7d535a
	    
kusano 7d535a
	    if ( U->nrow == 0 ) return 0; /* Quick return */
kusano 7d535a
	    
kusano 7d535a
	    for (k = Lstore->nsuper; k >= 0; k--) {
kusano 7d535a
	    	fsupc = L_FST_SUPC(k);
kusano 7d535a
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
kusano 7d535a
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
kusano 7d535a
	    	luptr = L_NZ_START(fsupc);
kusano 7d535a
		
kusano 7d535a
                /* 1 c_div costs 10 flops */
kusano 7d535a
    	        solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
kusano 7d535a
kusano 7d535a
		if ( nsupc == 1 ) {
kusano 7d535a
		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
kusano 7d535a
		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
kusano 7d535a
			irow = U_SUB(i);
kusano 7d535a
			cc_mult(&comp_zero, &x[fsupc], &Uval[i]);
kusano 7d535a
			c_sub(&x[irow], &x[irow], &comp_zero);
kusano 7d535a
		    }
kusano 7d535a
		} else {
kusano 7d535a
#ifdef USE_VENDOR_BLAS
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
		    CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
		       &x[fsupc], &incx);
kusano 7d535a
#else
kusano 7d535a
		    ctrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
                           &x[fsupc], &incx);
kusano 7d535a
#endif
kusano 7d535a
#else		
kusano 7d535a
		    cusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
kusano 7d535a
#endif		
kusano 7d535a
kusano 7d535a
		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
kusano 7d535a
		        solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
kusano 7d535a
		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
kusano 7d535a
				i++) {
kusano 7d535a
			    irow = U_SUB(i);
kusano 7d535a
			cc_mult(&comp_zero, &x[jcol], &Uval[i]);
kusano 7d535a
			c_sub(&x[irow], &x[irow], &comp_zero);
kusano 7d535a
		    	}
kusano 7d535a
                    }
kusano 7d535a
		}
kusano 7d535a
	    } /* for k ... */
kusano 7d535a
	    
kusano 7d535a
	}
kusano 7d535a
    } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */
kusano 7d535a
	
kusano 7d535a
	if ( lsame_(uplo, "L") ) {
kusano 7d535a
	    /* Form x := inv(L')*x */
kusano 7d535a
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
kusano 7d535a
	    
kusano 7d535a
	    for (k = Lstore->nsuper; k >= 0; --k) {
kusano 7d535a
	    	fsupc = L_FST_SUPC(k);
kusano 7d535a
	    	istart = L_SUB_START(fsupc);
kusano 7d535a
	    	nsupr = L_SUB_START(fsupc+1) - istart;
kusano 7d535a
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
kusano 7d535a
	    	luptr = L_NZ_START(fsupc);
kusano 7d535a
kusano 7d535a
		solve_ops += 8 * (nsupr - nsupc) * nsupc;
kusano 7d535a
kusano 7d535a
		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
kusano 7d535a
		    iptr = istart + nsupc;
kusano 7d535a
		    for (i = L_NZ_START(jcol) + nsupc; 
kusano 7d535a
				i < L_NZ_START(jcol+1); i++) {
kusano 7d535a
			irow = L_SUB(iptr);
kusano 7d535a
			cc_mult(&comp_zero, &x[irow], &Lval[i]);
kusano 7d535a
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
kusano 7d535a
			iptr++;
kusano 7d535a
		    }
kusano 7d535a
		}
kusano 7d535a
		
kusano 7d535a
		if ( nsupc > 1 ) {
kusano 7d535a
		    solve_ops += 4 * nsupc * (nsupc - 1);
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
                    ftcs1 = _cptofcd("L", strlen("L"));
kusano 7d535a
                    ftcs2 = _cptofcd("T", strlen("T"));
kusano 7d535a
                    ftcs3 = _cptofcd("U", strlen("U"));
kusano 7d535a
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
			&x[fsupc], &incx);
kusano 7d535a
#else
kusano 7d535a
		    ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
			&x[fsupc], &incx);
kusano 7d535a
#endif
kusano 7d535a
		}
kusano 7d535a
	    }
kusano 7d535a
	} else {
kusano 7d535a
	    /* Form x := inv(U')*x */
kusano 7d535a
	    if ( U->nrow == 0 ) return 0; /* Quick return */
kusano 7d535a
	    
kusano 7d535a
	    for (k = 0; k <= Lstore->nsuper; k++) {
kusano 7d535a
	    	fsupc = L_FST_SUPC(k);
kusano 7d535a
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
kusano 7d535a
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
kusano 7d535a
	    	luptr = L_NZ_START(fsupc);
kusano 7d535a
kusano 7d535a
		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
kusano 7d535a
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
kusano 7d535a
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
kusano 7d535a
			irow = U_SUB(i);
kusano 7d535a
			cc_mult(&comp_zero, &x[irow], &Uval[i]);
kusano 7d535a
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
kusano 7d535a
		    }
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
                /* 1 c_div costs 10 flops */
kusano 7d535a
		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
kusano 7d535a
kusano 7d535a
		if ( nsupc == 1 ) {
kusano 7d535a
		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
kusano 7d535a
		} else {
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
                    ftcs1 = _cptofcd("U", strlen("U"));
kusano 7d535a
                    ftcs2 = _cptofcd("T", strlen("T"));
kusano 7d535a
                    ftcs3 = _cptofcd("N", strlen("N"));
kusano 7d535a
		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
			    &x[fsupc], &incx);
kusano 7d535a
#else
kusano 7d535a
		    ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
			    &x[fsupc], &incx);
kusano 7d535a
#endif
kusano 7d535a
		}
kusano 7d535a
	    } /* for k ... */
kusano 7d535a
	}
kusano 7d535a
    } else { /* Form x := conj(inv(A'))*x */
kusano 7d535a
	
kusano 7d535a
	if ( lsame_(uplo, "L") ) {
kusano 7d535a
	    /* Form x := conj(inv(L'))*x */
kusano 7d535a
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
kusano 7d535a
	    
kusano 7d535a
	    for (k = Lstore->nsuper; k >= 0; --k) {
kusano 7d535a
	    	fsupc = L_FST_SUPC(k);
kusano 7d535a
	    	istart = L_SUB_START(fsupc);
kusano 7d535a
	    	nsupr = L_SUB_START(fsupc+1) - istart;
kusano 7d535a
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
kusano 7d535a
	    	luptr = L_NZ_START(fsupc);
kusano 7d535a
kusano 7d535a
		solve_ops += 8 * (nsupr - nsupc) * nsupc;
kusano 7d535a
kusano 7d535a
		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
kusano 7d535a
		    iptr = istart + nsupc;
kusano 7d535a
		    for (i = L_NZ_START(jcol) + nsupc; 
kusano 7d535a
				i < L_NZ_START(jcol+1); i++) {
kusano 7d535a
			irow = L_SUB(iptr);
kusano 7d535a
                        cc_conj(&temp, &Lval[i]);
kusano 7d535a
			cc_mult(&comp_zero, &x[irow], &temp);
kusano 7d535a
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
kusano 7d535a
			iptr++;
kusano 7d535a
		    }
kusano 7d535a
 		}
kusano 7d535a
 		
kusano 7d535a
 		if ( nsupc > 1 ) {
kusano 7d535a
		    solve_ops += 4 * nsupc * (nsupc - 1);
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
                    ftcs1 = _cptofcd("L", strlen("L"));
kusano 7d535a
                    ftcs2 = _cptofcd(trans, strlen("T"));
kusano 7d535a
                    ftcs3 = _cptofcd("U", strlen("U"));
kusano 7d535a
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
			&x[fsupc], &incx);
kusano 7d535a
#else
kusano 7d535a
                    ctrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
                           &x[fsupc], &incx);
kusano 7d535a
#endif
kusano 7d535a
		}
kusano 7d535a
	    }
kusano 7d535a
	} else {
kusano 7d535a
	    /* Form x := conj(inv(U'))*x */
kusano 7d535a
	    if ( U->nrow == 0 ) return 0; /* Quick return */
kusano 7d535a
	    
kusano 7d535a
	    for (k = 0; k <= Lstore->nsuper; k++) {
kusano 7d535a
	    	fsupc = L_FST_SUPC(k);
kusano 7d535a
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
kusano 7d535a
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
kusano 7d535a
	    	luptr = L_NZ_START(fsupc);
kusano 7d535a
kusano 7d535a
		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
kusano 7d535a
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
kusano 7d535a
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
kusano 7d535a
			irow = U_SUB(i);
kusano 7d535a
                        cc_conj(&temp, &Uval[i]);
kusano 7d535a
			cc_mult(&comp_zero, &x[irow], &temp);
kusano 7d535a
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
kusano 7d535a
		    }
kusano 7d535a
		}
kusano 7d535a
kusano 7d535a
                /* 1 c_div costs 10 flops */
kusano 7d535a
		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
kusano 7d535a
 
kusano 7d535a
		if ( nsupc == 1 ) {
kusano 7d535a
                    cc_conj(&temp, &Lval[luptr]);
kusano 7d535a
		    c_div(&x[fsupc], &x[fsupc], &temp);
kusano 7d535a
		} else {
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
                    ftcs1 = _cptofcd("U", strlen("U"));
kusano 7d535a
                    ftcs2 = _cptofcd(trans, strlen("T"));
kusano 7d535a
                    ftcs3 = _cptofcd("N", strlen("N"));
kusano 7d535a
		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
			    &x[fsupc], &incx);
kusano 7d535a
#else
kusano 7d535a
                    ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
kusano 7d535a
                               &x[fsupc], &incx);
kusano 7d535a
#endif
kusano 7d535a
  		}
kusano 7d535a
  	    } /* for k ... */
kusano 7d535a
  	}
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    stat->ops[SOLVE] += solve_ops;
kusano 7d535a
    SUPERLU_FREE(work);
kusano 7d535a
    return 0;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y
kusano 7d535a
 *
kusano 7d535a
 * 
  
kusano 7d535a
 *   Purpose   
kusano 7d535a
 *   =======   
kusano 7d535a
 *
kusano 7d535a
 *   sp_cgemv()  performs one of the matrix-vector operations   
kusano 7d535a
 *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
kusano 7d535a
 *   where alpha and beta are scalars, x and y are vectors and A is a
kusano 7d535a
 *   sparse A->nrow by A->ncol matrix.   
kusano 7d535a
 *
kusano 7d535a
 *   Parameters   
kusano 7d535a
 *   ==========   
kusano 7d535a
 *
kusano 7d535a
 *   TRANS  - (input) char*
kusano 7d535a
 *            On entry, TRANS specifies the operation to be performed as   
kusano 7d535a
 *            follows:   
kusano 7d535a
 *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
kusano 7d535a
 *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
kusano 7d535a
 *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
kusano 7d535a
 *
kusano 7d535a
 *   ALPHA  - (input) complex
kusano 7d535a
 *            On entry, ALPHA specifies the scalar alpha.   
kusano 7d535a
 *
kusano 7d535a
 *   A      - (input) SuperMatrix*
kusano 7d535a
 *            Before entry, the leading m by n part of the array A must   
kusano 7d535a
 *            contain the matrix of coefficients.   
kusano 7d535a
 *
kusano 7d535a
 *   X      - (input) complex*, array of DIMENSION at least   
kusano 7d535a
 *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
kusano 7d535a
 *           and at least   
kusano 7d535a
 *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
kusano 7d535a
 *            Before entry, the incremented array X must contain the   
kusano 7d535a
 *            vector x.   
kusano 7d535a
 * 
kusano 7d535a
 *   INCX   - (input) int
kusano 7d535a
 *            On entry, INCX specifies the increment for the elements of   
kusano 7d535a
 *            X. INCX must not be zero.   
kusano 7d535a
 *
kusano 7d535a
 *   BETA   - (input) complex
kusano 7d535a
 *            On entry, BETA specifies the scalar beta. When BETA is   
kusano 7d535a
 *            supplied as zero then Y need not be set on input.   
kusano 7d535a
 *
kusano 7d535a
 *   Y      - (output) complex*,  array of DIMENSION at least   
kusano 7d535a
 *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
kusano 7d535a
 *            and at least   
kusano 7d535a
 *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
kusano 7d535a
 *            Before entry with BETA non-zero, the incremented array Y   
kusano 7d535a
 *            must contain the vector y. On exit, Y is overwritten by the 
kusano 7d535a
 *            updated vector y.
kusano 7d535a
 *	      
kusano 7d535a
 *   INCY   - (input) int
kusano 7d535a
 *            On entry, INCY specifies the increment for the elements of   
kusano 7d535a
 *            Y. INCY must not be zero.   
kusano 7d535a
 *
kusano 7d535a
 *    ==== Sparse Level 2 Blas routine.   
kusano 7d535a
 * 
kusano 7d535a
*/
kusano 7d535a
int
kusano 7d535a
sp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x, 
kusano 7d535a
	 int incx, complex beta, complex *y, int incy)
kusano 7d535a
{
kusano 7d535a
kusano 7d535a
    /* Local variables */
kusano 7d535a
    NCformat *Astore;
kusano 7d535a
    complex   *Aval;
kusano 7d535a
    int info;
kusano 7d535a
    complex temp, temp1;
kusano 7d535a
    int lenx, leny, i, j, irow;
kusano 7d535a
    int iy, jx, jy, kx, ky;
kusano 7d535a
    int notran;
kusano 7d535a
    complex comp_zero = {0.0, 0.0};
kusano 7d535a
    complex comp_one = {1.0, 0.0};
kusano 7d535a
kusano 7d535a
    notran = lsame_(trans, "N");
kusano 7d535a
    Astore = A->Store;
kusano 7d535a
    Aval = Astore->nzval;
kusano 7d535a
    
kusano 7d535a
    /* Test the input parameters */
kusano 7d535a
    info = 0;
kusano 7d535a
    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
kusano 7d535a
    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
kusano 7d535a
    else if (incx == 0) info = 5;
kusano 7d535a
    else if (incy == 0)	info = 8;
kusano 7d535a
    if (info != 0) {
kusano 7d535a
	xerbla_("sp_cgemv ", &info);
kusano 7d535a
	return 0;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    /* Quick return if possible. */
kusano 7d535a
    if (A->nrow == 0 || A->ncol == 0 || 
kusano 7d535a
	c_eq(&alpha, &comp_zero) && 
kusano 7d535a
	c_eq(&beta, &comp_one))
kusano 7d535a
	return 0;
kusano 7d535a
kusano 7d535a
kusano 7d535a
    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
kusano 7d535a
       up the start points in  X  and  Y. */
kusano 7d535a
    if (lsame_(trans, "N")) {
kusano 7d535a
	lenx = A->ncol;
kusano 7d535a
	leny = A->nrow;
kusano 7d535a
    } else {
kusano 7d535a
	lenx = A->nrow;
kusano 7d535a
	leny = A->ncol;
kusano 7d535a
    }
kusano 7d535a
    if (incx > 0) kx = 0;
kusano 7d535a
    else kx =  - (lenx - 1) * incx;
kusano 7d535a
    if (incy > 0) ky = 0;
kusano 7d535a
    else ky =  - (leny - 1) * incy;
kusano 7d535a
kusano 7d535a
    /* Start the operations. In this version the elements of A are   
kusano 7d535a
       accessed sequentially with one pass through A. */
kusano 7d535a
    /* First form  y := beta*y. */
kusano 7d535a
    if ( !c_eq(&beta, &comp_one) ) {
kusano 7d535a
	if (incy == 1) {
kusano 7d535a
	    if ( c_eq(&beta, &comp_zero) )
kusano 7d535a
		for (i = 0; i < leny; ++i) y[i] = comp_zero;
kusano 7d535a
	    else
kusano 7d535a
		for (i = 0; i < leny; ++i) 
kusano 7d535a
		  cc_mult(&y[i], &beta, &y[i]);
kusano 7d535a
	} else {
kusano 7d535a
	    iy = ky;
kusano 7d535a
	    if ( c_eq(&beta, &comp_zero) )
kusano 7d535a
		for (i = 0; i < leny; ++i) {
kusano 7d535a
		    y[iy] = comp_zero;
kusano 7d535a
		    iy += incy;
kusano 7d535a
		}
kusano 7d535a
	    else
kusano 7d535a
		for (i = 0; i < leny; ++i) {
kusano 7d535a
		    cc_mult(&y[iy], &beta, &y[iy]);
kusano 7d535a
		    iy += incy;
kusano 7d535a
		}
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
    
kusano 7d535a
    if ( c_eq(&alpha, &comp_zero) ) return 0;
kusano 7d535a
kusano 7d535a
    if ( notran ) {
kusano 7d535a
	/* Form  y := alpha*A*x + y. */
kusano 7d535a
	jx = kx;
kusano 7d535a
	if (incy == 1) {
kusano 7d535a
	    for (j = 0; j < A->ncol; ++j) {
kusano 7d535a
		if ( !c_eq(&x[jx], &comp_zero) ) {
kusano 7d535a
		    cc_mult(&temp, &alpha, &x[jx]);
kusano 7d535a
		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
kusano 7d535a
			irow = Astore->rowind[i];
kusano 7d535a
			cc_mult(&temp1, &temp,  &Aval[i]);
kusano 7d535a
			c_add(&y[irow], &y[irow], &temp1);
kusano 7d535a
		    }
kusano 7d535a
		}
kusano 7d535a
		jx += incx;
kusano 7d535a
	    }
kusano 7d535a
	} else {
kusano 7d535a
	    ABORT("Not implemented.");
kusano 7d535a
	}
kusano 7d535a
    } else {
kusano 7d535a
	/* Form  y := alpha*A'*x + y. */
kusano 7d535a
	jy = ky;
kusano 7d535a
	if (incx == 1) {
kusano 7d535a
	    for (j = 0; j < A->ncol; ++j) {
kusano 7d535a
		temp = comp_zero;
kusano 7d535a
		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
kusano 7d535a
		    irow = Astore->rowind[i];
kusano 7d535a
		    cc_mult(&temp1, &Aval[i], &x[irow]);
kusano 7d535a
		    c_add(&temp, &temp, &temp1);
kusano 7d535a
		}
kusano 7d535a
		cc_mult(&temp1, &alpha, &temp);
kusano 7d535a
		c_add(&y[jy], &y[jy], &temp1);
kusano 7d535a
		jy += incy;
kusano 7d535a
	    }
kusano 7d535a
	} else {
kusano 7d535a
	    ABORT("Not implemented.");
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
    return 0;    
kusano 7d535a
} /* sp_cgemv */
kusano 7d535a