kusano 7d535a
kusano 7d535a
/*! @file cpanel_bmod.c
kusano 7d535a
 * \brief Performs numeric block updates
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 3.0) --
kusano 7d535a
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
kusano 7d535a
 * and Lawrence Berkeley National Lab.
kusano 7d535a
 * October 15, 2003
kusano 7d535a
 *
kusano 7d535a
 * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
kusano 7d535a
 *
kusano 7d535a
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
kusano 7d535a
 * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
kusano 7d535a
 * 
kusano 7d535a
 * Permission is hereby granted to use or copy this program for any
kusano 7d535a
 * purpose, provided the above notices are retained on all copies.
kusano 7d535a
 * Permission to modify the code and to distribute modified code is
kusano 7d535a
 * granted, provided the above notices are retained, and a notice that
kusano 7d535a
 * the code was modified is included with the above copyright notice.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
/*
kusano 7d535a
kusano 7d535a
*/
kusano 7d535a
kusano 7d535a
#include <stdio.h></stdio.h>
kusano 7d535a
#include <stdlib.h></stdlib.h>
kusano 7d535a
#include "slu_cdefs.h"
kusano 7d535a
kusano 7d535a
/* 
kusano 7d535a
 * Function prototypes 
kusano 7d535a
 */
kusano 7d535a
void clsolve(int, int, complex *, complex *);
kusano 7d535a
void cmatvec(int, int, int, complex *, complex *, complex *);
kusano 7d535a
extern void ccheck_tempv();
kusano 7d535a
kusano 7d535a
/*! \brief
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * Purpose
kusano 7d535a
 * =======
kusano 7d535a
 *
kusano 7d535a
 *    Performs numeric block updates (sup-panel) in topological order.
kusano 7d535a
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
kusano 7d535a
 *    Special processing on the supernodal portion of L\U[*,j]
kusano 7d535a
 *
kusano 7d535a
 *    Before entering this routine, the original nonzeros in the panel 
kusano 7d535a
 *    were already copied into the spa[m,w].
kusano 7d535a
 *
kusano 7d535a
 *    Updated/Output parameters-
kusano 7d535a
 *    dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
kusano 7d535a
 *    collectively in the m-by-w vector dense[*]. 
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
kusano 7d535a
void
kusano 7d535a
cpanel_bmod (
kusano 7d535a
	    const int  m,          /* in - number of rows in the matrix */
kusano 7d535a
	    const int  w,          /* in */
kusano 7d535a
	    const int  jcol,       /* in */
kusano 7d535a
	    const int  nseg,       /* in */
kusano 7d535a
	    complex     *dense,     /* out, of size n by w */
kusano 7d535a
	    complex     *tempv,     /* working array */
kusano 7d535a
	    int        *segrep,    /* in */
kusano 7d535a
	    int        *repfnz,    /* in, of size n by w */
kusano 7d535a
	    GlobalLU_t *Glu,       /* modified */
kusano 7d535a
	    SuperLUStat_t *stat    /* output */
kusano 7d535a
	    )
kusano 7d535a
{
kusano 7d535a
kusano 7d535a
kusano 7d535a
#ifdef USE_VENDOR_BLAS
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
kusano 7d535a
         ftcs2 = _cptofcd("N", strlen("N")),
kusano 7d535a
         ftcs3 = _cptofcd("U", strlen("U"));
kusano 7d535a
#endif
kusano 7d535a
    int          incx = 1, incy = 1;
kusano 7d535a
    complex       alpha, beta;
kusano 7d535a
#endif
kusano 7d535a
kusano 7d535a
    register int k, ksub;
kusano 7d535a
    int          fsupc, nsupc, nsupr, nrow;
kusano 7d535a
    int          krep, krep_ind;
kusano 7d535a
    complex       ukj, ukj1, ukj2;
kusano 7d535a
    int          luptr, luptr1, luptr2;
kusano 7d535a
    int          segsze;
kusano 7d535a
    int          block_nrow;  /* no of rows in a block row */
kusano 7d535a
    register int lptr;	      /* Points to the row subscripts of a supernode */
kusano 7d535a
    int          kfnz, irow, no_zeros; 
kusano 7d535a
    register int isub, isub1, i;
kusano 7d535a
    register int jj;	      /* Index through each column in the panel */
kusano 7d535a
    int          *xsup, *supno;
kusano 7d535a
    int          *lsub, *xlsub;
kusano 7d535a
    complex       *lusup;
kusano 7d535a
    int          *xlusup;
kusano 7d535a
    int          *repfnz_col; /* repfnz[] for a column in the panel */
kusano 7d535a
    complex       *dense_col;  /* dense[] for a column in the panel */
kusano 7d535a
    complex       *tempv1;             /* Used in 1-D update */
kusano 7d535a
    complex       *TriTmp, *MatvecTmp; /* used in 2-D update */
kusano 7d535a
    complex      zero = {0.0, 0.0};
kusano 7d535a
    complex      one = {1.0, 0.0};
kusano 7d535a
    complex      comp_temp, comp_temp1;
kusano 7d535a
    register int ldaTmp;
kusano 7d535a
    register int r_ind, r_hi;
kusano 7d535a
    static   int first = 1, maxsuper, rowblk, colblk;
kusano 7d535a
    flops_t  *ops = stat->ops;
kusano 7d535a
    
kusano 7d535a
    xsup    = Glu->xsup;
kusano 7d535a
    supno   = Glu->supno;
kusano 7d535a
    lsub    = Glu->lsub;
kusano 7d535a
    xlsub   = Glu->xlsub;
kusano 7d535a
    lusup   = Glu->lusup;
kusano 7d535a
    xlusup  = Glu->xlusup;
kusano 7d535a
    
kusano 7d535a
    if ( first ) {
kusano 7d535a
	maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) );
kusano 7d535a
	rowblk   = sp_ienv(4);
kusano 7d535a
	colblk   = sp_ienv(5);
kusano 7d535a
	first = 0;
kusano 7d535a
    }
kusano 7d535a
    ldaTmp = maxsuper + rowblk;
kusano 7d535a
kusano 7d535a
    /* 
kusano 7d535a
     * For each nonz supernode segment of U[*,j] in topological order 
kusano 7d535a
     */
kusano 7d535a
    k = nseg - 1;
kusano 7d535a
    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
kusano 7d535a
kusano 7d535a
	/* krep = representative of current k-th supernode
kusano 7d535a
	 * fsupc = first supernodal column
kusano 7d535a
	 * nsupc = no of columns in a supernode
kusano 7d535a
	 * nsupr = no of rows in a supernode
kusano 7d535a
	 */
kusano 7d535a
        krep = segrep[k--];
kusano 7d535a
	fsupc = xsup[supno[krep]];
kusano 7d535a
	nsupc = krep - fsupc + 1;
kusano 7d535a
	nsupr = xlsub[fsupc+1] - xlsub[fsupc];
kusano 7d535a
	nrow = nsupr - nsupc;
kusano 7d535a
	lptr = xlsub[fsupc];
kusano 7d535a
	krep_ind = lptr + nsupc - 1;
kusano 7d535a
kusano 7d535a
	repfnz_col = repfnz;
kusano 7d535a
	dense_col = dense;
kusano 7d535a
	
kusano 7d535a
	if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
kusano 7d535a
kusano 7d535a
	    TriTmp = tempv;
kusano 7d535a
	
kusano 7d535a
	    /* Sequence through each column in panel -- triangular solves */
kusano 7d535a
	    for (jj = jcol; jj < jcol + w; jj++,
kusano 7d535a
		 repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
kusano 7d535a
kusano 7d535a
		kfnz = repfnz_col[krep];
kusano 7d535a
		if ( kfnz == EMPTY ) continue;	/* Skip any zero segment */
kusano 7d535a
	    
kusano 7d535a
		segsze = krep - kfnz + 1;
kusano 7d535a
		luptr = xlusup[fsupc];
kusano 7d535a
kusano 7d535a
		ops[TRSV] += 4 * segsze * (segsze - 1);
kusano 7d535a
		ops[GEMV] += 8 * nrow * segsze;
kusano 7d535a
	
kusano 7d535a
		/* Case 1: Update U-segment of size 1 -- col-col update */
kusano 7d535a
		if ( segsze == 1 ) {
kusano 7d535a
		    ukj = dense_col[lsub[krep_ind]];
kusano 7d535a
		    luptr += nsupr*(nsupc-1) + nsupc;
kusano 7d535a
kusano 7d535a
		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
kusano 7d535a
			irow = lsub[i];
kusano 7d535a
	    	        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
kusano 7d535a
		        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
kusano 7d535a
			++luptr;
kusano 7d535a
		    }
kusano 7d535a
kusano 7d535a
		} else if ( segsze <= 3 ) {
kusano 7d535a
		    ukj = dense_col[lsub[krep_ind]];
kusano 7d535a
		    ukj1 = dense_col[lsub[krep_ind - 1]];
kusano 7d535a
		    luptr += nsupr*(nsupc-1) + nsupc-1;
kusano 7d535a
		    luptr1 = luptr - nsupr;
kusano 7d535a
kusano 7d535a
		    if ( segsze == 2 ) {
kusano 7d535a
		        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
kusano 7d535a
		        c_sub(&ukj, &ukj, &comp_temp);
kusano 7d535a
			dense_col[lsub[krep_ind]] = ukj;
kusano 7d535a
			for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
kusano 7d535a
			    irow = lsub[i];
kusano 7d535a
			    luptr++; luptr1++;
kusano 7d535a
			    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
kusano 7d535a
			    cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
kusano 7d535a
			    c_add(&comp_temp, &comp_temp, &comp_temp1);
kusano 7d535a
			    c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
kusano 7d535a
			}
kusano 7d535a
		    } else {
kusano 7d535a
			ukj2 = dense_col[lsub[krep_ind - 2]];
kusano 7d535a
			luptr2 = luptr1 - nsupr;
kusano 7d535a
  		        cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
kusano 7d535a
		        c_sub(&ukj1, &ukj1, &comp_temp);
kusano 7d535a
kusano 7d535a
		        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
kusano 7d535a
        		cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
kusano 7d535a
		        c_add(&comp_temp, &comp_temp, &comp_temp1);
kusano 7d535a
		        c_sub(&ukj, &ukj, &comp_temp);
kusano 7d535a
			dense_col[lsub[krep_ind]] = ukj;
kusano 7d535a
			dense_col[lsub[krep_ind-1]] = ukj1;
kusano 7d535a
			for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
kusano 7d535a
			    irow = lsub[i];
kusano 7d535a
			    luptr++; luptr1++; luptr2++;
kusano 7d535a
			    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
kusano 7d535a
			    cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
kusano 7d535a
			    c_add(&comp_temp, &comp_temp, &comp_temp1);
kusano 7d535a
			    cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
kusano 7d535a
			    c_add(&comp_temp, &comp_temp, &comp_temp1);
kusano 7d535a
			    c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
kusano 7d535a
			}
kusano 7d535a
		    }
kusano 7d535a
kusano 7d535a
		} else  {	/* segsze >= 4 */
kusano 7d535a
		    
kusano 7d535a
		    /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
kusano 7d535a
		       holds the result of triangular solves.    */
kusano 7d535a
		    no_zeros = kfnz - fsupc;
kusano 7d535a
		    isub = lptr + no_zeros;
kusano 7d535a
		    for (i = 0; i < segsze; ++i) {
kusano 7d535a
			irow = lsub[isub];
kusano 7d535a
			TriTmp[i] = dense_col[irow]; /* Gather */
kusano 7d535a
			++isub;
kusano 7d535a
		    }
kusano 7d535a
		    
kusano 7d535a
		    /* start effective triangle */
kusano 7d535a
		    luptr += nsupr * no_zeros + no_zeros;
kusano 7d535a
kusano 7d535a
#ifdef USE_VENDOR_BLAS
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
		    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
kusano 7d535a
			   &nsupr, TriTmp, &incx );
kusano 7d535a
#else
kusano 7d535a
		    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
kusano 7d535a
			   &nsupr, TriTmp, &incx );
kusano 7d535a
#endif
kusano 7d535a
#else		
kusano 7d535a
		    clsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
kusano 7d535a
#endif
kusano 7d535a
		    
kusano 7d535a
kusano 7d535a
		} /* else ... */
kusano 7d535a
	    
kusano 7d535a
	    }  /* for jj ... end tri-solves */
kusano 7d535a
kusano 7d535a
	    /* Block row updates; push all the way into dense[*] block */
kusano 7d535a
	    for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
kusano 7d535a
		
kusano 7d535a
		r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
kusano 7d535a
		block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
kusano 7d535a
		luptr = xlusup[fsupc] + nsupc + r_ind;
kusano 7d535a
		isub1 = lptr + nsupc + r_ind;
kusano 7d535a
		
kusano 7d535a
		repfnz_col = repfnz;
kusano 7d535a
		TriTmp = tempv;
kusano 7d535a
		dense_col = dense;
kusano 7d535a
		
kusano 7d535a
		/* Sequence through each column in panel -- matrix-vector */
kusano 7d535a
		for (jj = jcol; jj < jcol + w; jj++,
kusano 7d535a
		     repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
kusano 7d535a
		    
kusano 7d535a
		    kfnz = repfnz_col[krep];
kusano 7d535a
		    if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
kusano 7d535a
		    
kusano 7d535a
		    segsze = krep - kfnz + 1;
kusano 7d535a
		    if ( segsze <= 3 ) continue;   /* skip unrolled cases */
kusano 7d535a
		    
kusano 7d535a
		    /* Perform a block update, and scatter the result of
kusano 7d535a
		       matrix-vector to dense[].		 */
kusano 7d535a
		    no_zeros = kfnz - fsupc;
kusano 7d535a
		    luptr1 = luptr + nsupr * no_zeros;
kusano 7d535a
		    MatvecTmp = &TriTmp[maxsuper];
kusano 7d535a
		    
kusano 7d535a
#ifdef USE_VENDOR_BLAS
kusano 7d535a
		    alpha = one; 
kusano 7d535a
                    beta = zero;
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
		    CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
kusano 7d535a
			   &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
kusano 7d535a
#else
kusano 7d535a
		    cgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
kusano 7d535a
			   &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
kusano 7d535a
#endif
kusano 7d535a
#else
kusano 7d535a
		    cmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
kusano 7d535a
			   TriTmp, MatvecTmp);
kusano 7d535a
#endif
kusano 7d535a
		    
kusano 7d535a
		    /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
kusano 7d535a
		     * such that MatvecTmp[*] can be re-used for the
kusano 7d535a
		     * the next blok row update. dense[] will be copied into 
kusano 7d535a
		     * global store after the whole panel has been finished.
kusano 7d535a
		     */
kusano 7d535a
		    isub = isub1;
kusano 7d535a
		    for (i = 0; i < block_nrow; i++) {
kusano 7d535a
			irow = lsub[isub];
kusano 7d535a
		        c_sub(&dense_col[irow], &dense_col[irow], 
kusano 7d535a
                              &MatvecTmp[i]);
kusano 7d535a
			MatvecTmp[i] = zero;
kusano 7d535a
			++isub;
kusano 7d535a
		    }
kusano 7d535a
		    
kusano 7d535a
		} /* for jj ... */
kusano 7d535a
		
kusano 7d535a
	    } /* for each block row ... */
kusano 7d535a
	    
kusano 7d535a
	    /* Scatter the triangular solves into SPA dense[*] */
kusano 7d535a
	    repfnz_col = repfnz;
kusano 7d535a
	    TriTmp = tempv;
kusano 7d535a
	    dense_col = dense;
kusano 7d535a
	    
kusano 7d535a
	    for (jj = jcol; jj < jcol + w; jj++,
kusano 7d535a
		 repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
kusano 7d535a
		kfnz = repfnz_col[krep];
kusano 7d535a
		if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
kusano 7d535a
		
kusano 7d535a
		segsze = krep - kfnz + 1;
kusano 7d535a
		if ( segsze <= 3 ) continue; /* skip unrolled cases */
kusano 7d535a
		
kusano 7d535a
		no_zeros = kfnz - fsupc;		
kusano 7d535a
		isub = lptr + no_zeros;
kusano 7d535a
		for (i = 0; i < segsze; i++) {
kusano 7d535a
		    irow = lsub[isub];
kusano 7d535a
		    dense_col[irow] = TriTmp[i];
kusano 7d535a
		    TriTmp[i] = zero;
kusano 7d535a
		    ++isub;
kusano 7d535a
		}
kusano 7d535a
		
kusano 7d535a
	    } /* for jj ... */
kusano 7d535a
	    
kusano 7d535a
	} else { /* 1-D block modification */
kusano 7d535a
	    
kusano 7d535a
	    
kusano 7d535a
	    /* Sequence through each column in the panel */
kusano 7d535a
	    for (jj = jcol; jj < jcol + w; jj++,
kusano 7d535a
		 repfnz_col += m, dense_col += m) {
kusano 7d535a
		
kusano 7d535a
		kfnz = repfnz_col[krep];
kusano 7d535a
		if ( kfnz == EMPTY ) continue;	/* Skip any zero segment */
kusano 7d535a
		
kusano 7d535a
		segsze = krep - kfnz + 1;
kusano 7d535a
		luptr = xlusup[fsupc];
kusano 7d535a
kusano 7d535a
		ops[TRSV] += 4 * segsze * (segsze - 1);
kusano 7d535a
		ops[GEMV] += 8 * nrow * segsze;
kusano 7d535a
		
kusano 7d535a
		/* Case 1: Update U-segment of size 1 -- col-col update */
kusano 7d535a
		if ( segsze == 1 ) {
kusano 7d535a
		    ukj = dense_col[lsub[krep_ind]];
kusano 7d535a
		    luptr += nsupr*(nsupc-1) + nsupc;
kusano 7d535a
kusano 7d535a
		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
kusano 7d535a
			irow = lsub[i];
kusano 7d535a
	    	        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
kusano 7d535a
		        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
kusano 7d535a
			++luptr;
kusano 7d535a
		    }
kusano 7d535a
kusano 7d535a
		} else if ( segsze <= 3 ) {
kusano 7d535a
		    ukj = dense_col[lsub[krep_ind]];
kusano 7d535a
		    luptr += nsupr*(nsupc-1) + nsupc-1;
kusano 7d535a
		    ukj1 = dense_col[lsub[krep_ind - 1]];
kusano 7d535a
		    luptr1 = luptr - nsupr;
kusano 7d535a
kusano 7d535a
		    if ( segsze == 2 ) {
kusano 7d535a
		        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
kusano 7d535a
		        c_sub(&ukj, &ukj, &comp_temp);
kusano 7d535a
			dense_col[lsub[krep_ind]] = ukj;
kusano 7d535a
			for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
kusano 7d535a
			    irow = lsub[i];
kusano 7d535a
			    ++luptr;  ++luptr1;
kusano 7d535a
			    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
kusano 7d535a
			    cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
kusano 7d535a
			    c_add(&comp_temp, &comp_temp, &comp_temp1);
kusano 7d535a
			    c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
kusano 7d535a
			}
kusano 7d535a
		    } else {
kusano 7d535a
			ukj2 = dense_col[lsub[krep_ind - 2]];
kusano 7d535a
			luptr2 = luptr1 - nsupr;
kusano 7d535a
  		        cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
kusano 7d535a
		        c_sub(&ukj1, &ukj1, &comp_temp);
kusano 7d535a
kusano 7d535a
		        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
kusano 7d535a
        		cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
kusano 7d535a
		        c_add(&comp_temp, &comp_temp, &comp_temp1);
kusano 7d535a
		        c_sub(&ukj, &ukj, &comp_temp);
kusano 7d535a
			dense_col[lsub[krep_ind]] = ukj;
kusano 7d535a
			dense_col[lsub[krep_ind-1]] = ukj1;
kusano 7d535a
			for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
kusano 7d535a
			    irow = lsub[i];
kusano 7d535a
			    ++luptr; ++luptr1; ++luptr2;
kusano 7d535a
			    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
kusano 7d535a
			    cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
kusano 7d535a
			    c_add(&comp_temp, &comp_temp, &comp_temp1);
kusano 7d535a
			    cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
kusano 7d535a
			    c_add(&comp_temp, &comp_temp, &comp_temp1);
kusano 7d535a
			    c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
kusano 7d535a
			}
kusano 7d535a
		    }
kusano 7d535a
kusano 7d535a
		} else  { /* segsze >= 4 */
kusano 7d535a
		    /* 
kusano 7d535a
		     * Perform a triangular solve and block update,
kusano 7d535a
		     * then scatter the result of sup-col update to dense[].
kusano 7d535a
		     */
kusano 7d535a
		    no_zeros = kfnz - fsupc;
kusano 7d535a
		    
kusano 7d535a
		    /* Copy U[*,j] segment from dense[*] to tempv[*]: 
kusano 7d535a
		     *    The result of triangular solve is in tempv[*];
kusano 7d535a
		     *    The result of matrix vector update is in dense_col[*]
kusano 7d535a
		     */
kusano 7d535a
		    isub = lptr + no_zeros;
kusano 7d535a
		    for (i = 0; i < segsze; ++i) {
kusano 7d535a
			irow = lsub[isub];
kusano 7d535a
			tempv[i] = dense_col[irow]; /* Gather */
kusano 7d535a
			++isub;
kusano 7d535a
		    }
kusano 7d535a
		    
kusano 7d535a
		    /* start effective triangle */
kusano 7d535a
		    luptr += nsupr * no_zeros + no_zeros;
kusano 7d535a
		    
kusano 7d535a
#ifdef USE_VENDOR_BLAS
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
		    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
kusano 7d535a
			   &nsupr, tempv, &incx );
kusano 7d535a
#else
kusano 7d535a
		    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
kusano 7d535a
			   &nsupr, tempv, &incx );
kusano 7d535a
#endif
kusano 7d535a
		    
kusano 7d535a
		    luptr += segsze;	/* Dense matrix-vector */
kusano 7d535a
		    tempv1 = &tempv[segsze];
kusano 7d535a
                    alpha = one;
kusano 7d535a
                    beta = zero;
kusano 7d535a
#ifdef _CRAY
kusano 7d535a
		    CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
kusano 7d535a
			   &nsupr, tempv, &incx, &beta, tempv1, &incy );
kusano 7d535a
#else
kusano 7d535a
		    cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
kusano 7d535a
			   &nsupr, tempv, &incx, &beta, tempv1, &incy );
kusano 7d535a
#endif
kusano 7d535a
#else
kusano 7d535a
		    clsolve ( nsupr, segsze, &lusup[luptr], tempv );
kusano 7d535a
		    
kusano 7d535a
		    luptr += segsze;        /* Dense matrix-vector */
kusano 7d535a
		    tempv1 = &tempv[segsze];
kusano 7d535a
		    cmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
kusano 7d535a
#endif
kusano 7d535a
		    
kusano 7d535a
		    /* Scatter tempv[*] into SPA dense[*] temporarily, such
kusano 7d535a
		     * that tempv[*] can be used for the triangular solve of
kusano 7d535a
		     * the next column of the panel. They will be copied into 
kusano 7d535a
		     * ucol[*] after the whole panel has been finished.
kusano 7d535a
		     */
kusano 7d535a
		    isub = lptr + no_zeros;
kusano 7d535a
		    for (i = 0; i < segsze; i++) {
kusano 7d535a
			irow = lsub[isub];
kusano 7d535a
			dense_col[irow] = tempv[i];
kusano 7d535a
			tempv[i] = zero;
kusano 7d535a
			isub++;
kusano 7d535a
		    }
kusano 7d535a
		    
kusano 7d535a
		    /* Scatter the update from tempv1[*] into SPA dense[*] */
kusano 7d535a
		    /* Start dense rectangular L */
kusano 7d535a
		    for (i = 0; i < nrow; i++) {
kusano 7d535a
			irow = lsub[isub];
kusano 7d535a
		        c_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]);
kusano 7d535a
			tempv1[i] = zero;
kusano 7d535a
			++isub;	
kusano 7d535a
		    }
kusano 7d535a
		    
kusano 7d535a
		} /* else segsze>=4 ... */
kusano 7d535a
		
kusano 7d535a
	    } /* for each column in the panel... */
kusano 7d535a
	    
kusano 7d535a
	} /* else 1-D update ... */
kusano 7d535a
kusano 7d535a
    } /* for each updating supernode ... */
kusano 7d535a
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a