kusano 7d535a
kusano 7d535a
/*! @file ssnode_bmod.c
kusano 7d535a
 * \brief Performs numeric block updates within the relaxed snode.
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 3.0) --
kusano 7d535a
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
kusano 7d535a
 * and Lawrence Berkeley National Lab.
kusano 7d535a
 * October 15, 2003
kusano 7d535a
 *
kusano 7d535a
 * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
kusano 7d535a
 *
kusano 7d535a
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
kusano 7d535a
 * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
kusano 7d535a
 * 
kusano 7d535a
 * Permission is hereby granted to use or copy this program for any
kusano 7d535a
 * purpose, provided the above notices are retained on all copies.
kusano 7d535a
 * Permission to modify the code and to distribute modified code is
kusano 7d535a
 * granted, provided the above notices are retained, and a notice that
kusano 7d535a
 * the code was modified is included with the above copyright notice.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
kusano 7d535a
#include "slu_sdefs.h"
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief Performs numeric block updates within the relaxed snode. 
kusano 7d535a
 */
kusano 7d535a
int
kusano 7d535a
ssnode_bmod (
kusano 7d535a
	    const int  jcol,	  /* in */
kusano 7d535a
	    const int  jsupno,    /* in */
kusano 7d535a
	    const int  fsupc,     /* in */
kusano 7d535a
	    float     *dense,    /* in */
kusano 7d535a
	    float     *tempv,    /* working array */
kusano 7d535a
	    GlobalLU_t *Glu,      /* modified */
kusano 7d535a
	    SuperLUStat_t *stat   /* output */
kusano 7d535a
	    )
kusano 7d535a
{
kusano 7d535a
#ifdef USE_VENDOR_BLAS
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
kusano 7d535a
	 ftcs2 = _cptofcd("N", strlen("N")),
kusano 7d535a
	 ftcs3 = _cptofcd("U", strlen("U"));
kusano 7d535a
#endif
kusano 7d535a
    int            incx = 1, incy = 1;
kusano 7d535a
    float         alpha = -1.0, beta = 1.0;
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    int            luptr, nsupc, nsupr, nrow;
kusano 7d535a
    int            isub, irow, i, iptr; 
kusano 7d535a
    register int   ufirst, nextlu;
kusano 7d535a
    int            *lsub, *xlsub;
kusano 7d535a
    float         *lusup;
kusano 7d535a
    int            *xlusup;
kusano 7d535a
    flops_t *ops = stat->ops;
kusano 7d535a
kusano 7d535a
    lsub    = Glu->lsub;
kusano 7d535a
    xlsub   = Glu->xlsub;
kusano 7d535a
    lusup   = Glu->lusup;
kusano 7d535a
    xlusup  = Glu->xlusup;
kusano 7d535a
kusano 7d535a
    nextlu = xlusup[jcol];
kusano 7d535a
    
kusano 7d535a
    /*
kusano 7d535a
     *	Process the supernodal portion of L\U[*,j]
kusano 7d535a
     */
kusano 7d535a
    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
kusano 7d535a
  	irow = lsub[isub];
kusano 7d535a
	lusup[nextlu] = dense[irow];
kusano 7d535a
	dense[irow] = 0;
kusano 7d535a
	++nextlu;
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    xlusup[jcol + 1] = nextlu;	/* Initialize xlusup for next column */
kusano 7d535a
    
kusano 7d535a
    if ( fsupc < jcol ) {
kusano 7d535a
kusano 7d535a
	luptr = xlusup[fsupc];
kusano 7d535a
	nsupr = xlsub[fsupc+1] - xlsub[fsupc];
kusano 7d535a
	nsupc = jcol - fsupc;	/* Excluding jcol */
kusano 7d535a
	ufirst = xlusup[jcol];	/* Points to the beginning of column
kusano 7d535a
				   jcol in supernode L\U(jsupno). */
kusano 7d535a
	nrow = nsupr - nsupc;
kusano 7d535a
kusano 7d535a
	ops[TRSV] += nsupc * (nsupc - 1);
kusano 7d535a
	ops[GEMV] += 2 * nrow * nsupc;
kusano 7d535a
kusano 7d535a
#ifdef USE_VENDOR_BLAS
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
	STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
kusano 7d535a
	      &lusup[ufirst], &incx );
kusano 7d535a
	SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
kusano 7d535a
		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
kusano 7d535a
#else
kusano 7d535a
	strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
kusano 7d535a
	      &lusup[ufirst], &incx );
kusano 7d535a
	sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
kusano 7d535a
		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
kusano 7d535a
#endif
kusano 7d535a
#else
kusano 7d535a
	slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
kusano 7d535a
	smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
kusano 7d535a
			&lusup[ufirst], &tempv[0] );
kusano 7d535a
kusano 7d535a
        /* Scatter tempv[*] into lusup[*] */
kusano 7d535a
	iptr = ufirst + nsupc;
kusano 7d535a
	for (i = 0; i < nrow; i++) {
kusano 7d535a
	    lusup[iptr++] -= tempv[i];
kusano 7d535a
	    tempv[i] = 0.0;
kusano 7d535a
	}
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    return 0;
kusano 7d535a
}