|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! @file zsnode_bmod.c
|
|
kusano |
7d535a |
* \brief Performs numeric block updates within the relaxed snode.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* -- SuperLU routine (version 3.0) --
|
|
kusano |
7d535a |
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
kusano |
7d535a |
* and Lawrence Berkeley National Lab.
|
|
kusano |
7d535a |
* October 15, 2003
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
|
kusano |
7d535a |
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
* Permission is hereby granted to use or copy this program for any
|
|
kusano |
7d535a |
* purpose, provided the above notices are retained on all copies.
|
|
kusano |
7d535a |
* Permission to modify the code and to distribute modified code is
|
|
kusano |
7d535a |
* granted, provided the above notices are retained, and a notice that
|
|
kusano |
7d535a |
* the code was modified is included with the above copyright notice.
|
|
kusano |
7d535a |
*
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#include "slu_zdefs.h"
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*! \brief Performs numeric block updates within the relaxed snode.
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
int
|
|
kusano |
7d535a |
zsnode_bmod (
|
|
kusano |
7d535a |
const int jcol, /* in */
|
|
kusano |
7d535a |
const int jsupno, /* in */
|
|
kusano |
7d535a |
const int fsupc, /* in */
|
|
kusano |
7d535a |
doublecomplex *dense, /* in */
|
|
kusano |
7d535a |
doublecomplex *tempv, /* working array */
|
|
kusano |
7d535a |
GlobalLU_t *Glu, /* modified */
|
|
kusano |
7d535a |
SuperLUStat_t *stat /* output */
|
|
kusano |
7d535a |
)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
#ifdef USE_VENDOR_BLAS
|
|
kusano |
7d535a |
#ifdef _CRAY
|
|
kusano |
7d535a |
_fcd ftcs1 = _cptofcd("L", strlen("L")),
|
|
kusano |
7d535a |
ftcs2 = _cptofcd("N", strlen("N")),
|
|
kusano |
7d535a |
ftcs3 = _cptofcd("U", strlen("U"));
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
int incx = 1, incy = 1;
|
|
kusano |
7d535a |
doublecomplex alpha = {-1.0, 0.0}, beta = {1.0, 0.0};
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
doublecomplex comp_zero = {0.0, 0.0};
|
|
kusano |
7d535a |
int luptr, nsupc, nsupr, nrow;
|
|
kusano |
7d535a |
int isub, irow, i, iptr;
|
|
kusano |
7d535a |
register int ufirst, nextlu;
|
|
kusano |
7d535a |
int *lsub, *xlsub;
|
|
kusano |
7d535a |
doublecomplex *lusup;
|
|
kusano |
7d535a |
int *xlusup;
|
|
kusano |
7d535a |
flops_t *ops = stat->ops;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
lsub = Glu->lsub;
|
|
kusano |
7d535a |
xlsub = Glu->xlsub;
|
|
kusano |
7d535a |
lusup = Glu->lusup;
|
|
kusano |
7d535a |
xlusup = Glu->xlusup;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
nextlu = xlusup[jcol];
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/*
|
|
kusano |
7d535a |
* Process the supernodal portion of L\U[*,j]
|
|
kusano |
7d535a |
*/
|
|
kusano |
7d535a |
for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
|
|
kusano |
7d535a |
irow = lsub[isub];
|
|
kusano |
7d535a |
lusup[nextlu] = dense[irow];
|
|
kusano |
7d535a |
dense[irow] = comp_zero;
|
|
kusano |
7d535a |
++nextlu;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
if ( fsupc < jcol ) {
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
luptr = xlusup[fsupc];
|
|
kusano |
7d535a |
nsupr = xlsub[fsupc+1] - xlsub[fsupc];
|
|
kusano |
7d535a |
nsupc = jcol - fsupc; /* Excluding jcol */
|
|
kusano |
7d535a |
ufirst = xlusup[jcol]; /* Points to the beginning of column
|
|
kusano |
7d535a |
jcol in supernode L\U(jsupno). */
|
|
kusano |
7d535a |
nrow = nsupr - nsupc;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
ops[TRSV] += 4 * nsupc * (nsupc - 1);
|
|
kusano |
7d535a |
ops[GEMV] += 8 * nrow * nsupc;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
#ifdef USE_VENDOR_BLAS
|
|
kusano |
7d535a |
#ifdef _CRAY
|
|
kusano |
7d535a |
CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr,
|
|
kusano |
7d535a |
&lusup[ufirst], &incx );
|
|
kusano |
7d535a |
CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
|
|
kusano |
7d535a |
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
ztrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr,
|
|
kusano |
7d535a |
&lusup[ufirst], &incx );
|
|
kusano |
7d535a |
zgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
|
|
kusano |
7d535a |
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
#else
|
|
kusano |
7d535a |
zlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
|
|
kusano |
7d535a |
zmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
|
|
kusano |
7d535a |
&lusup[ufirst], &tempv[0] );
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Scatter tempv[*] into lusup[*] */
|
|
kusano |
7d535a |
iptr = ufirst + nsupc;
|
|
kusano |
7d535a |
for (i = 0; i < nrow; i++) {
|
|
kusano |
7d535a |
z_sub(&lusup[iptr], &lusup[iptr], &tempv[i]);
|
|
kusano |
7d535a |
++iptr;
|
|
kusano |
7d535a |
tempv[i] = comp_zero;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
#endif
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|