|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file ssp_blas2.c
|
|
kusano |
7d535a |
* \brief Sparse BLAS 2, using some dense BLAS 2 operations
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 3.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* October 15, 2003
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* File name: ssp_blas2.c
|
|
kusano |
7d535a |
* Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include "slu_sdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Function prototypes
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void susolve(int, int, float*, float*);
|
|
kusano |
7d535a |
void slsolve(int, int, float*, float*);
|
|
kusano |
7d535a |
void smatvec(int, int, int, float*, float*, float*);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Solves one of the systems of equations A*x = b, or A'*x = b
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* sp_strsv() solves one of the systems of equations
|
|
kusano |
7d535a |
* A*x = b, or A'*x = b,
|
|
kusano |
7d535a |
* where b and x are n element vectors and A is a sparse unit , or
|
|
kusano |
7d535a |
* non-unit, upper or lower triangular matrix.
|
|
kusano |
7d535a |
* No test for singularity or near-singularity is included in this
|
|
kusano |
7d535a |
* routine. Such tests must be performed before calling this routine.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Parameters
|
|
kusano |
7d535a |
* ==========
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* uplo - (input) char*
|
|
kusano |
7d535a |
* On entry, uplo specifies whether the matrix is an upper or
|
|
kusano |
7d535a |
* lower triangular matrix as follows:
|
|
kusano |
7d535a |
* uplo = 'U' or 'u' A is an upper triangular matrix.
|
|
kusano |
7d535a |
* uplo = 'L' or 'l' A is a lower triangular matrix.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* trans - (input) char*
|
|
kusano |
7d535a |
* On entry, trans specifies the equations to be solved as
|
|
kusano |
7d535a |
* follows:
|
|
kusano |
7d535a |
* trans = 'N' or 'n' A*x = b.
|
|
kusano |
7d535a |
* trans = 'T' or 't' A'*x = b.
|
|
kusano |
7d535a |
* trans = 'C' or 'c' A'*x = b.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* diag - (input) char*
|
|
kusano |
7d535a |
* On entry, diag specifies whether or not A is unit
|
|
kusano |
7d535a |
* triangular as follows:
|
|
kusano |
7d535a |
* diag = 'U' or 'u' A is assumed to be unit triangular.
|
|
kusano |
7d535a |
* diag = 'N' or 'n' A is not assumed to be unit
|
|
kusano |
7d535a |
* triangular.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* L - (input) SuperMatrix*
|
|
kusano |
7d535a |
* The factor L from the factorization Pr*A*Pc=L*U. Use
|
|
kusano |
7d535a |
* compressed row subscripts storage for supernodes,
|
|
kusano |
7d535a |
* i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* U - (input) SuperMatrix*
|
|
kusano |
7d535a |
* The factor U from the factorization Pr*A*Pc=L*U.
|
|
kusano |
7d535a |
* U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* x - (input/output) float*
|
|
kusano |
7d535a |
* Before entry, the incremented array X must contain the n
|
|
kusano |
7d535a |
* element right-hand side vector b. On exit, X is overwritten
|
|
kusano |
7d535a |
* with the solution vector x.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* info - (output) int*
|
|
kusano |
7d535a |
* If *info = -i, the i-th argument had an illegal value.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
|
|
kusano |
7d535a |
SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
#ifdef _CRAY
|
|
kusano |
7d535a |
_fcd ftcs1 = _cptofcd("L", strlen("L")),
|
|
kusano |
7d535a |
ftcs2 = _cptofcd("N", strlen("N")),
|
|
kusano |
7d535a |
ftcs3 = _cptofcd("U", strlen("U"));
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
SCformat *Lstore;
|
|
kusano |
7d535a |
NCformat *Ustore;
|
|
kusano |
7d535a |
float *Lval, *Uval;
|
|
kusano |
7d535a |
int incx = 1, incy = 1;
|
|
kusano |
7d535a |
float alpha = 1.0, beta = 1.0;
|
|
kusano |
7d535a |
int nrow;
|
|
kusano |
7d535a |
int fsupc, nsupr, nsupc, luptr, istart, irow;
|
|
kusano |
7d535a |
int i, k, iptr, jcol;
|
|
kusano |
7d535a |
float *work;
|
|
kusano |
7d535a |
flops_t solve_ops;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Test the input parameters */
|
|
kusano |
7d535a |
*info = 0;
|
|
kusano |
7d535a |
if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
|
|
kusano |
7d535a |
else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&
|
|
kusano |
7d535a |
!lsame_(trans, "C")) *info = -2;
|
|
kusano |
7d535a |
else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
|
|
kusano |
7d535a |
else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
|
|
kusano |
7d535a |
else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
|
|
kusano |
7d535a |
if ( *info ) {
|
|
kusano |
7d535a |
i = -(*info);
|
|
kusano |
7d535a |
xerbla_("sp_strsv", &i);
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
Lstore = L->Store;
|
|
kusano |
7d535a |
Lval = Lstore->nzval;
|
|
kusano |
7d535a |
Ustore = U->Store;
|
|
kusano |
7d535a |
Uval = Ustore->nzval;
|
|
kusano |
7d535a |
solve_ops = 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( !(work = floatCalloc(L->nrow)) )
|
|
kusano |
7d535a |
ABORT("Malloc fails for work in sp_strsv().");
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( lsame_(uplo, "L") ) {
|
|
kusano |
7d535a |
/* Form x := inv(L)*x */
|
|
kusano |
7d535a |
if ( L->nrow == 0 ) return 0; /* Quick return */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (k = 0; k <= Lstore->nsuper; k++) {
|
|
kusano |
7d535a |
fsupc = L_FST_SUPC(k);
|
|
kusano |
7d535a |
istart = L_SUB_START(fsupc);
|
|
kusano |
7d535a |
nsupr = L_SUB_START(fsupc+1) - istart;
|
|
kusano |
7d535a |
nsupc = L_FST_SUPC(k+1) - fsupc;
|
|
kusano |
7d535a |
luptr = L_NZ_START(fsupc);
|
|
kusano |
7d535a |
nrow = nsupr - nsupc;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
solve_ops += nsupc * (nsupc - 1);
|
|
kusano |
7d535a |
solve_ops += 2 * nrow * nsupc;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( nsupc == 1 ) {
|
|
kusano |
7d535a |
for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
|
|
kusano |
7d535a |
irow = L_SUB(iptr);
|
|
kusano |
7d535a |
++luptr;
|
|
kusano |
7d535a |
x[irow] -= x[fsupc] * Lval[luptr];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
#ifdef USE_VENDOR_BLAS
|
|
kusano |
7d535a |
#ifdef _CRAY
|
|
kusano |
7d535a |
STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
|
|
kusano |
7d535a |
&x[fsupc], &incx);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
|
|
kusano |
7d535a |
&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
|
|
kusano |
7d535a |
&x[fsupc], &incx);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
|
|
kusano |
7d535a |
&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
|
|
kusano |
7d535a |
&x[fsupc], &work[0] );
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
iptr = istart + nsupc;
|
|
kusano |
7d535a |
for (i = 0; i < nrow; ++i, ++iptr) {
|
|
kusano |
7d535a |
irow = L_SUB(iptr);
|
|
kusano |
7d535a |
x[irow] -= work[i]; /* Scatter */
|
|
kusano |
7d535a |
work[i] = 0.0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} /* for k ... */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
/* Form x := inv(U)*x */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( U->nrow == 0 ) return 0; /* Quick return */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (k = Lstore->nsuper; k >= 0; k--) {
|
|
kusano |
7d535a |
fsupc = L_FST_SUPC(k);
|
|
kusano |
7d535a |
nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
|
|
kusano |
7d535a |
nsupc = L_FST_SUPC(k+1) - fsupc;
|
|
kusano |
7d535a |
luptr = L_NZ_START(fsupc);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
solve_ops += nsupc * (nsupc + 1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( nsupc == 1 ) {
|
|
kusano |
7d535a |
x[fsupc] /= Lval[luptr];
|
|
kusano |
7d535a |
for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
|
|
kusano |
7d535a |
irow = U_SUB(i);
|
|
kusano |
7d535a |
x[irow] -= x[fsupc] * Uval[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
#ifdef USE_VENDOR_BLAS
|
|
kusano |
7d535a |
#ifdef _CRAY
|
|
kusano |
7d535a |
STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
|
|
kusano |
7d535a |
&x[fsupc], &incx);
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
|
|
kusano |
7d535a |
&x[fsupc], &incx);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
|
|
kusano |
7d535a |
solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
|
|
kusano |
7d535a |
for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
|
|
kusano |
7d535a |
i++) {
|
|
kusano |
7d535a |
irow = U_SUB(i);
|
|
kusano |
7d535a |
x[irow] -= x[jcol] * Uval[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} /* for k ... */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else { /* Form x := inv(A')*x */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( lsame_(uplo, "L") ) {
|
|
kusano |
7d535a |
/* Form x := inv(L')*x */
|
|
kusano |
7d535a |
if ( L->nrow == 0 ) return 0; /* Quick return */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (k = Lstore->nsuper; k >= 0; --k) {
|
|
kusano |
7d535a |
fsupc = L_FST_SUPC(k);
|
|
kusano |
7d535a |
istart = L_SUB_START(fsupc);
|
|
kusano |
7d535a |
nsupr = L_SUB_START(fsupc+1) - istart;
|
|
kusano |
7d535a |
nsupc = L_FST_SUPC(k+1) - fsupc;
|
|
kusano |
7d535a |
luptr = L_NZ_START(fsupc);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
solve_ops += 2 * (nsupr - nsupc) * nsupc;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
|
|
kusano |
7d535a |
iptr = istart + nsupc;
|
|
kusano |
7d535a |
for (i = L_NZ_START(jcol) + nsupc;
|
|
kusano |
7d535a |
i < L_NZ_START(jcol+1); i++) {
|
|
kusano |
7d535a |
irow = L_SUB(iptr);
|
|
kusano |
7d535a |
x[jcol] -= x[irow] * Lval[i];
|
|
kusano |
7d535a |
iptr++;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( nsupc > 1 ) {
|
|
kusano |
7d535a |
solve_ops += nsupc * (nsupc - 1);
|
|
kusano |
7d535a |
#ifdef _CRAY
|
|
kusano |
7d535a |
ftcs1 = _cptofcd("L", strlen("L"));
|
|
kusano |
7d535a |
ftcs2 = _cptofcd("T", strlen("T"));
|
|
kusano |
7d535a |
ftcs3 = _cptofcd("U", strlen("U"));
|
|
kusano |
7d535a |
STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
|
|
kusano |
7d535a |
&x[fsupc], &incx);
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
|
|
kusano |
7d535a |
&x[fsupc], &incx);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
/* Form x := inv(U')*x */
|
|
kusano |
7d535a |
if ( U->nrow == 0 ) return 0; /* Quick return */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (k = 0; k <= Lstore->nsuper; k++) {
|
|
kusano |
7d535a |
fsupc = L_FST_SUPC(k);
|
|
kusano |
7d535a |
nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
|
|
kusano |
7d535a |
nsupc = L_FST_SUPC(k+1) - fsupc;
|
|
kusano |
7d535a |
luptr = L_NZ_START(fsupc);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
|
|
kusano |
7d535a |
solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
|
|
kusano |
7d535a |
for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
|
|
kusano |
7d535a |
irow = U_SUB(i);
|
|
kusano |
7d535a |
x[jcol] -= x[irow] * Uval[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
solve_ops += nsupc * (nsupc + 1);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( nsupc == 1 ) {
|
|
kusano |
7d535a |
x[fsupc] /= Lval[luptr];
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
#ifdef _CRAY
|
|
kusano |
7d535a |
ftcs1 = _cptofcd("U", strlen("U"));
|
|
kusano |
7d535a |
ftcs2 = _cptofcd("T", strlen("T"));
|
|
kusano |
7d535a |
ftcs3 = _cptofcd("N", strlen("N"));
|
|
kusano |
7d535a |
STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
|
|
kusano |
7d535a |
&x[fsupc], &incx);
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
|
|
kusano |
7d535a |
&x[fsupc], &incx);
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} /* for k ... */
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
stat->ops[SOLVE] += solve_ops;
|
|
kusano |
7d535a |
SUPERLU_FREE(work);
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose
|
|
kusano |
7d535a |
* =======
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* sp_sgemv() performs one of the matrix-vector operations
|
|
kusano |
7d535a |
* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
|
|
kusano |
7d535a |
* where alpha and beta are scalars, x and y are vectors and A is a
|
|
kusano |
7d535a |
* sparse A->nrow by A->ncol matrix.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Parameters
|
|
kusano |
7d535a |
* ==========
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* TRANS - (input) char*
|
|
kusano |
7d535a |
* On entry, TRANS specifies the operation to be performed as
|
|
kusano |
7d535a |
* follows:
|
|
kusano |
7d535a |
* TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
|
|
kusano |
7d535a |
* TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
|
|
kusano |
7d535a |
* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* ALPHA - (input) float
|
|
kusano |
7d535a |
* On entry, ALPHA specifies the scalar alpha.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* A - (input) SuperMatrix*
|
|
kusano |
7d535a |
* Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
|
|
kusano |
7d535a |
* Currently, the type of A can be:
|
|
kusano |
7d535a |
* Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
|
|
kusano |
7d535a |
* In the future, more general A can be handled.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* X - (input) float*, array of DIMENSION at least
|
|
kusano |
7d535a |
* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
|
|
kusano |
7d535a |
* and at least
|
|
kusano |
7d535a |
* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
|
|
kusano |
7d535a |
* Before entry, the incremented array X must contain the
|
|
kusano |
7d535a |
* vector x.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* INCX - (input) int
|
|
kusano |
7d535a |
* On entry, INCX specifies the increment for the elements of
|
|
kusano |
7d535a |
* X. INCX must not be zero.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* BETA - (input) float
|
|
kusano |
7d535a |
* On entry, BETA specifies the scalar beta. When BETA is
|
|
kusano |
7d535a |
* supplied as zero then Y need not be set on input.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Y - (output) float*, array of DIMENSION at least
|
|
kusano |
7d535a |
* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
|
|
kusano |
7d535a |
* and at least
|
|
kusano |
7d535a |
* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
|
|
kusano |
7d535a |
* Before entry with BETA non-zero, the incremented array Y
|
|
kusano |
7d535a |
* must contain the vector y. On exit, Y is overwritten by the
|
|
kusano |
7d535a |
* updated vector y.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* INCY - (input) int
|
|
kusano |
7d535a |
* On entry, INCY specifies the increment for the elements of
|
|
kusano |
7d535a |
* Y. INCY must not be zero.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* ==== Sparse Level 2 Blas routine.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
|
|
kusano |
7d535a |
int incx, float beta, float *y, int incy)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/* Local variables */
|
|
kusano |
7d535a |
NCformat *Astore;
|
|
kusano |
7d535a |
float *Aval;
|
|
kusano |
7d535a |
int info;
|
|
kusano |
7d535a |
float temp;
|
|
kusano |
7d535a |
int lenx, leny, i, j, irow;
|
|
kusano |
7d535a |
int iy, jx, jy, kx, ky;
|
|
kusano |
7d535a |
int notran;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
notran = lsame_(trans, "N");
|
|
kusano |
7d535a |
Astore = A->Store;
|
|
kusano |
7d535a |
Aval = Astore->nzval;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Test the input parameters */
|
|
kusano |
7d535a |
info = 0;
|
|
kusano |
7d535a |
if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
|
|
kusano |
7d535a |
else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
|
|
kusano |
7d535a |
else if (incx == 0) info = 5;
|
|
kusano |
7d535a |
else if (incy == 0) info = 8;
|
|
kusano |
7d535a |
if (info != 0) {
|
|
kusano |
7d535a |
xerbla_("sp_sgemv ", &info);
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Quick return if possible. */
|
|
kusano |
7d535a |
if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Set LENX and LENY, the lengths of the vectors x and y, and set
|
|
kusano |
7d535a |
up the start points in X and Y. */
|
|
kusano |
7d535a |
if (lsame_(trans, "N")) {
|
|
kusano |
7d535a |
lenx = A->ncol;
|
|
kusano |
7d535a |
leny = A->nrow;
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
lenx = A->nrow;
|
|
kusano |
7d535a |
leny = A->ncol;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
if (incx > 0) kx = 0;
|
|
kusano |
7d535a |
else kx = - (lenx - 1) * incx;
|
|
kusano |
7d535a |
if (incy > 0) ky = 0;
|
|
kusano |
7d535a |
else ky = - (leny - 1) * incy;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Start the operations. In this version the elements of A are
|
|
kusano |
7d535a |
accessed sequentially with one pass through A. */
|
|
kusano |
7d535a |
/* First form y := beta*y. */
|
|
kusano |
7d535a |
if (beta != 1.) {
|
|
kusano |
7d535a |
if (incy == 1) {
|
|
kusano |
7d535a |
if (beta == 0.)
|
|
kusano |
7d535a |
for (i = 0; i < leny; ++i) y[i] = 0.;
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
for (i = 0; i < leny; ++i) y[i] = beta * y[i];
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
iy = ky;
|
|
kusano |
7d535a |
if (beta == 0.)
|
|
kusano |
7d535a |
for (i = 0; i < leny; ++i) {
|
|
kusano |
7d535a |
y[iy] = 0.;
|
|
kusano |
7d535a |
iy += incy;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
else
|
|
kusano |
7d535a |
for (i = 0; i < leny; ++i) {
|
|
kusano |
7d535a |
y[iy] = beta * y[iy];
|
|
kusano |
7d535a |
iy += incy;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if (alpha == 0.) return 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( notran ) {
|
|
kusano |
7d535a |
/* Form y := alpha*A*x + y. */
|
|
kusano |
7d535a |
jx = kx;
|
|
kusano |
7d535a |
if (incy == 1) {
|
|
kusano |
7d535a |
for (j = 0; j < A->ncol; ++j) {
|
|
kusano |
7d535a |
if (x[jx] != 0.) {
|
|
kusano |
7d535a |
temp = alpha * x[jx];
|
|
kusano |
7d535a |
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
|
|
kusano |
7d535a |
irow = Astore->rowind[i];
|
|
kusano |
7d535a |
y[irow] += temp * Aval[i];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
jx += incx;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
ABORT("Not implemented.");
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
/* Form y := alpha*A'*x + y. */
|
|
kusano |
7d535a |
jy = ky;
|
|
kusano |
7d535a |
if (incx == 1) {
|
|
kusano |
7d535a |
for (j = 0; j < A->ncol; ++j) {
|
|
kusano |
7d535a |
temp = 0.;
|
|
kusano |
7d535a |
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
|
|
kusano |
7d535a |
irow = Astore->rowind[i];
|
|
kusano |
7d535a |
temp += Aval[i] * x[irow];
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
y[jy] += alpha * temp;
|
|
kusano |
7d535a |
jy += incy;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
ABORT("Not implemented.");
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
} /* sp_sgemv */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|