|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file cmyblas2.c
|
|
kusano |
7d535a |
* \brief Level 2 Blas operations
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 2.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* November 15, 1997
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Purpose:
|
|
kusano |
7d535a |
* Level 2 BLAS operations: solves and matvec, written in C.
|
|
kusano |
7d535a |
* Note:
|
|
kusano |
7d535a |
* This is only used when the system lacks an efficient BLAS library.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* File name: cmyblas2.c
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
#include "slu_scomplex.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Solves a dense UNIT lower triangular system
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* The unit lower
|
|
kusano |
7d535a |
* triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
|
|
kusano |
7d535a |
* The solution will be returned in the rhs vector.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void clsolve ( int ldm, int ncol, complex *M, complex *rhs )
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
int k;
|
|
kusano |
7d535a |
complex x0, x1, x2, x3, temp;
|
|
kusano |
7d535a |
complex *M0;
|
|
kusano |
7d535a |
complex *Mki0, *Mki1, *Mki2, *Mki3;
|
|
kusano |
7d535a |
register int firstcol = 0;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
M0 = &M[0];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
while ( firstcol < ncol - 3 ) { /* Do 4 columns */
|
|
kusano |
7d535a |
Mki0 = M0 + 1;
|
|
kusano |
7d535a |
Mki1 = Mki0 + ldm + 1;
|
|
kusano |
7d535a |
Mki2 = Mki1 + ldm + 1;
|
|
kusano |
7d535a |
Mki3 = Mki2 + ldm + 1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
x0 = rhs[firstcol];
|
|
kusano |
7d535a |
cc_mult(&temp, &x0, Mki0); Mki0++;
|
|
kusano |
7d535a |
c_sub(&x1, &rhs[firstcol+1], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x0, Mki0); Mki0++;
|
|
kusano |
7d535a |
c_sub(&x2, &rhs[firstcol+2], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x1, Mki1); Mki1++;
|
|
kusano |
7d535a |
c_sub(&x2, &x2, &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x0, Mki0); Mki0++;
|
|
kusano |
7d535a |
c_sub(&x3, &rhs[firstcol+3], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x1, Mki1); Mki1++;
|
|
kusano |
7d535a |
c_sub(&x3, &x3, &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x2, Mki2); Mki2++;
|
|
kusano |
7d535a |
c_sub(&x3, &x3, &temp);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
rhs[++firstcol] = x1;
|
|
kusano |
7d535a |
rhs[++firstcol] = x2;
|
|
kusano |
7d535a |
rhs[++firstcol] = x3;
|
|
kusano |
7d535a |
++firstcol;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (k = firstcol; k < ncol; k++) {
|
|
kusano |
7d535a |
cc_mult(&temp, &x0, Mki0); Mki0++;
|
|
kusano |
7d535a |
c_sub(&rhs[k], &rhs[k], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x1, Mki1); Mki1++;
|
|
kusano |
7d535a |
c_sub(&rhs[k], &rhs[k], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x2, Mki2); Mki2++;
|
|
kusano |
7d535a |
c_sub(&rhs[k], &rhs[k], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x3, Mki3); Mki3++;
|
|
kusano |
7d535a |
c_sub(&rhs[k], &rhs[k], &temp);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
M0 += 4 * ldm + 4;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( firstcol < ncol - 1 ) { /* Do 2 columns */
|
|
kusano |
7d535a |
Mki0 = M0 + 1;
|
|
kusano |
7d535a |
Mki1 = Mki0 + ldm + 1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
x0 = rhs[firstcol];
|
|
kusano |
7d535a |
cc_mult(&temp, &x0, Mki0); Mki0++;
|
|
kusano |
7d535a |
c_sub(&x1, &rhs[firstcol+1], &temp);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
rhs[++firstcol] = x1;
|
|
kusano |
7d535a |
++firstcol;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (k = firstcol; k < ncol; k++) {
|
|
kusano |
7d535a |
cc_mult(&temp, &x0, Mki0); Mki0++;
|
|
kusano |
7d535a |
c_sub(&rhs[k], &rhs[k], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &x1, Mki1); Mki1++;
|
|
kusano |
7d535a |
c_sub(&rhs[k], &rhs[k], &temp);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Solves a dense upper triangular system.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* The upper triangular matrix is
|
|
kusano |
7d535a |
* stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
|
|
kusano |
7d535a |
* in the rhs vector.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void
|
|
kusano |
7d535a |
cusolve ( ldm, ncol, M, rhs )
|
|
kusano |
7d535a |
int ldm; /* in */
|
|
kusano |
7d535a |
int ncol; /* in */
|
|
kusano |
7d535a |
complex *M; /* in */
|
|
kusano |
7d535a |
complex *rhs; /* modified */
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
complex xj, temp;
|
|
kusano |
7d535a |
int jcol, j, irow;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
jcol = ncol - 1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (j = 0; j < ncol; j++) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
c_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
|
|
kusano |
7d535a |
rhs[jcol] = xj;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
for (irow = 0; irow < jcol; irow++) {
|
|
kusano |
7d535a |
cc_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
|
|
kusano |
7d535a |
c_sub(&rhs[irow], &rhs[irow], &temp);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
jcol--;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
void cmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
|
|
kusano |
7d535a |
int ldm; /* in -- leading dimension of M */
|
|
kusano |
7d535a |
int nrow; /* in */
|
|
kusano |
7d535a |
int ncol; /* in */
|
|
kusano |
7d535a |
complex *M; /* in */
|
|
kusano |
7d535a |
complex *vec; /* in */
|
|
kusano |
7d535a |
complex *Mxvec; /* in/out */
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
complex vi0, vi1, vi2, vi3;
|
|
kusano |
7d535a |
complex *M0, temp;
|
|
kusano |
7d535a |
complex *Mki0, *Mki1, *Mki2, *Mki3;
|
|
kusano |
7d535a |
register int firstcol = 0;
|
|
kusano |
7d535a |
int k;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
M0 = &M[0];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
while ( firstcol < ncol - 3 ) { /* Do 4 columns */
|
|
kusano |
7d535a |
Mki0 = M0;
|
|
kusano |
7d535a |
Mki1 = Mki0 + ldm;
|
|
kusano |
7d535a |
Mki2 = Mki1 + ldm;
|
|
kusano |
7d535a |
Mki3 = Mki2 + ldm;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
vi0 = vec[firstcol++];
|
|
kusano |
7d535a |
vi1 = vec[firstcol++];
|
|
kusano |
7d535a |
vi2 = vec[firstcol++];
|
|
kusano |
7d535a |
vi3 = vec[firstcol++];
|
|
kusano |
7d535a |
for (k = 0; k < nrow; k++) {
|
|
kusano |
7d535a |
cc_mult(&temp, &vi0, Mki0); Mki0++;
|
|
kusano |
7d535a |
c_add(&Mxvec[k], &Mxvec[k], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &vi1, Mki1); Mki1++;
|
|
kusano |
7d535a |
c_add(&Mxvec[k], &Mxvec[k], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &vi2, Mki2); Mki2++;
|
|
kusano |
7d535a |
c_add(&Mxvec[k], &Mxvec[k], &temp);
|
|
kusano |
7d535a |
cc_mult(&temp, &vi3, Mki3); Mki3++;
|
|
kusano |
7d535a |
c_add(&Mxvec[k], &Mxvec[k], &temp);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
M0 += 4 * ldm;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
while ( firstcol < ncol ) { /* Do 1 column */
|
|
kusano |
7d535a |
Mki0 = M0;
|
|
kusano |
7d535a |
vi0 = vec[firstcol++];
|
|
kusano |
7d535a |
for (k = 0; k < nrow; k++) {
|
|
kusano |
7d535a |
cc_mult(&temp, &vi0, Mki0); Mki0++;
|
|
kusano |
7d535a |
c_add(&Mxvec[k], &Mxvec[k], &temp);
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
M0 += ldm;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|