kusano 7d535a
kusano 7d535a
/*! @file smemory.c
kusano 7d535a
 * \brief Memory details
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * -- SuperLU routine (version 4.0) --
kusano 7d535a
 * Lawrence Berkeley National Laboratory.
kusano 7d535a
 * June 30, 2009
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
#include "slu_sdefs.h"
kusano 7d535a
kusano 7d535a
kusano 7d535a
/* Internal prototypes */
kusano 7d535a
void  *sexpand (int *, MemType,int, int, GlobalLU_t *);
kusano 7d535a
int   sLUWorkInit (int, int, int, int **, float **, GlobalLU_t *);
kusano 7d535a
void  copy_mem_float (int, void *, void *);
kusano 7d535a
void  sStackCompress (GlobalLU_t *);
kusano 7d535a
void  sSetupSpace (void *, int, GlobalLU_t *);
kusano 7d535a
void  *suser_malloc (int, int, GlobalLU_t *);
kusano 7d535a
void  suser_free (int, int, GlobalLU_t *);
kusano 7d535a
kusano 7d535a
/* External prototypes (in memory.c - prec-independent) */
kusano 7d535a
extern void    copy_mem_int    (int, void *, void *);
kusano 7d535a
extern void    user_bcopy      (char *, char *, int);
kusano 7d535a
kusano 7d535a
kusano 7d535a
/* Macros to manipulate stack */
kusano 7d535a
#define StackFull(x)         ( x + Glu->stack.used >= Glu->stack.size )
kusano 7d535a
#define NotDoubleAlign(addr) ( (long int)addr & 7 )
kusano 7d535a
#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
kusano 7d535a
#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
kusano 7d535a
			      (w + 1) * m * sizeof(float) )
kusano 7d535a
#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief Setup the memory model to be used for factorization.
kusano 7d535a
 *  
kusano 7d535a
 *    lwork = 0: use system malloc;
kusano 7d535a
 *    lwork > 0: use user-supplied work[] space.
kusano 7d535a
 */
kusano 7d535a
void sSetupSpace(void *work, int lwork, GlobalLU_t *Glu)
kusano 7d535a
{
kusano 7d535a
    if ( lwork == 0 ) {
kusano 7d535a
	Glu->MemModel = SYSTEM; /* malloc/free */
kusano 7d535a
    } else if ( lwork > 0 ) {
kusano 7d535a
	Glu->MemModel = USER;   /* user provided space */
kusano 7d535a
	Glu->stack.used = 0;
kusano 7d535a
	Glu->stack.top1 = 0;
kusano 7d535a
	Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */
kusano 7d535a
	Glu->stack.size = Glu->stack.top2;
kusano 7d535a
	Glu->stack.array = (void *) work;
kusano 7d535a
    }
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
void *suser_malloc(int bytes, int which_end, GlobalLU_t *Glu)
kusano 7d535a
{
kusano 7d535a
    void *buf;
kusano 7d535a
    
kusano 7d535a
    if ( StackFull(bytes) ) return (NULL);
kusano 7d535a
kusano 7d535a
    if ( which_end == HEAD ) {
kusano 7d535a
	buf = (char*) Glu->stack.array + Glu->stack.top1;
kusano 7d535a
	Glu->stack.top1 += bytes;
kusano 7d535a
    } else {
kusano 7d535a
	Glu->stack.top2 -= bytes;
kusano 7d535a
	buf = (char*) Glu->stack.array + Glu->stack.top2;
kusano 7d535a
    }
kusano 7d535a
    
kusano 7d535a
    Glu->stack.used += bytes;
kusano 7d535a
    return buf;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
void suser_free(int bytes, int which_end, GlobalLU_t *Glu)
kusano 7d535a
{
kusano 7d535a
    if ( which_end == HEAD ) {
kusano 7d535a
	Glu->stack.top1 -= bytes;
kusano 7d535a
    } else {
kusano 7d535a
	Glu->stack.top2 += bytes;
kusano 7d535a
    }
kusano 7d535a
    Glu->stack.used -= bytes;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief 
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * mem_usage consists of the following fields:
kusano 7d535a
 *    - for_lu (float)
kusano 7d535a
 *      The amount of space used in bytes for the L\U data structures.
kusano 7d535a
 *    - total_needed (float)
kusano 7d535a
 *      The amount of space needed in bytes to perform factorization.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
kusano 7d535a
{
kusano 7d535a
    SCformat *Lstore;
kusano 7d535a
    NCformat *Ustore;
kusano 7d535a
    register int n, iword, dword, panel_size = sp_ienv(1);
kusano 7d535a
kusano 7d535a
    Lstore = L->Store;
kusano 7d535a
    Ustore = U->Store;
kusano 7d535a
    n = L->ncol;
kusano 7d535a
    iword = sizeof(int);
kusano 7d535a
    dword = sizeof(float);
kusano 7d535a
kusano 7d535a
    /* For LU factors */
kusano 7d535a
    mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword +
kusano 7d535a
                                 Lstore->nzval_colptr[n] * dword +
kusano 7d535a
                                 Lstore->rowind_colptr[n] * iword );
kusano 7d535a
    mem_usage->for_lu += (float)( (n + 1.0) * iword +
kusano 7d535a
				 Ustore->colptr[n] * (dword + iword) );
kusano 7d535a
kusano 7d535a
    /* Working storage to support factorization */
kusano 7d535a
    mem_usage->total_needed = mem_usage->for_lu +
kusano 7d535a
	(float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword +
kusano 7d535a
		(panel_size + 1.0) * n * dword );
kusano 7d535a
kusano 7d535a
    return 0;
kusano 7d535a
} /* sQuerySpace */
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * mem_usage consists of the following fields:
kusano 7d535a
 *    - for_lu (float)
kusano 7d535a
 *      The amount of space used in bytes for the L\U data structures.
kusano 7d535a
 *    - total_needed (float)
kusano 7d535a
 *      The amount of space needed in bytes to perform factorization.
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
int ilu_sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
kusano 7d535a
{
kusano 7d535a
    SCformat *Lstore;
kusano 7d535a
    NCformat *Ustore;
kusano 7d535a
    register int n, panel_size = sp_ienv(1);
kusano 7d535a
    register float iword, dword;
kusano 7d535a
kusano 7d535a
    Lstore = L->Store;
kusano 7d535a
    Ustore = U->Store;
kusano 7d535a
    n = L->ncol;
kusano 7d535a
    iword = sizeof(int);
kusano 7d535a
    dword = sizeof(double);
kusano 7d535a
kusano 7d535a
    /* For LU factors */
kusano 7d535a
    mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword +
kusano 7d535a
				 Lstore->nzval_colptr[n] * dword +
kusano 7d535a
				 Lstore->rowind_colptr[n] * iword );
kusano 7d535a
    mem_usage->for_lu += (float)( (n + 1.0f) * iword +
kusano 7d535a
				 Ustore->colptr[n] * (dword + iword) );
kusano 7d535a
kusano 7d535a
    /* Working storage to support factorization.
kusano 7d535a
       ILU needs 5*n more integers than LU */
kusano 7d535a
    mem_usage->total_needed = mem_usage->for_lu +
kusano 7d535a
	(float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword +
kusano 7d535a
		(panel_size + 1.0f) * n * dword );
kusano 7d535a
kusano 7d535a
    return 0;
kusano 7d535a
} /* ilu_sQuerySpace */
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief Allocate storage for the data structures common to all factor routines.
kusano 7d535a
 *
kusano 7d535a
 * 
kusano 7d535a
 * For those unpredictable size, estimate as fill_ratio * nnz(A).
kusano 7d535a
 * Return value:
kusano 7d535a
 *     If lwork = -1, return the estimated amount of space required, plus n;
kusano 7d535a
 *     otherwise, return the amount of space actually allocated when
kusano 7d535a
 *     memory allocation failure occurred.
kusano 7d535a
 *  
kusano 7d535a
 */
kusano 7d535a
int
kusano 7d535a
sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
kusano 7d535a
	  int panel_size, float fill_ratio, SuperMatrix *L, SuperMatrix *U,
kusano 7d535a
          GlobalLU_t *Glu, int **iwork, float **dwork)
kusano 7d535a
{
kusano 7d535a
    int      info, iword, dword;
kusano 7d535a
    SCformat *Lstore;
kusano 7d535a
    NCformat *Ustore;
kusano 7d535a
    int      *xsup, *supno;
kusano 7d535a
    int      *lsub, *xlsub;
kusano 7d535a
    float   *lusup;
kusano 7d535a
    int      *xlusup;
kusano 7d535a
    float   *ucol;
kusano 7d535a
    int      *usub, *xusub;
kusano 7d535a
    int      nzlmax, nzumax, nzlumax;
kusano 7d535a
    
kusano 7d535a
    iword     = sizeof(int);
kusano 7d535a
    dword     = sizeof(float);
kusano 7d535a
    Glu->n    = n;
kusano 7d535a
    Glu->num_expansions = 0;
kusano 7d535a
kusano 7d535a
    if ( !Glu->expanders )	
kusano 7d535a
        Glu->expanders = (ExpHeader*)SUPERLU_MALLOC( NO_MEMTYPE *
kusano 7d535a
                                                     sizeof(ExpHeader) );
kusano 7d535a
    if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
kusano 7d535a
    
kusano 7d535a
    if ( fact != SamePattern_SameRowPerm ) {
kusano 7d535a
	/* Guess for L\U factors */
kusano 7d535a
	nzumax = nzlumax = fill_ratio * annz;
kusano 7d535a
	nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz;
kusano 7d535a
kusano 7d535a
	if ( lwork == -1 ) {
kusano 7d535a
	    return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
kusano 7d535a
		    + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
kusano 7d535a
        } else {
kusano 7d535a
	    sSetupSpace(work, lwork, Glu);
kusano 7d535a
	}
kusano 7d535a
	
kusano 7d535a
#if ( PRNTlevel >= 1 )
kusano 7d535a
	printf("sLUMemInit() called: fill_ratio %.0f, nzlmax %ld, nzumax %ld\n", 
kusano 7d535a
	       fill_ratio, nzlmax, nzumax);
kusano 7d535a
	fflush(stdout);
kusano 7d535a
#endif	
kusano 7d535a
	
kusano 7d535a
	/* Integer pointers for L\U factors */
kusano 7d535a
	if ( Glu->MemModel == SYSTEM ) {
kusano 7d535a
	    xsup   = intMalloc(n+1);
kusano 7d535a
	    supno  = intMalloc(n+1);
kusano 7d535a
	    xlsub  = intMalloc(n+1);
kusano 7d535a
	    xlusup = intMalloc(n+1);
kusano 7d535a
	    xusub  = intMalloc(n+1);
kusano 7d535a
	} else {
kusano 7d535a
	    xsup   = (int *)suser_malloc((n+1) * iword, HEAD, Glu);
kusano 7d535a
	    supno  = (int *)suser_malloc((n+1) * iword, HEAD, Glu);
kusano 7d535a
	    xlsub  = (int *)suser_malloc((n+1) * iword, HEAD, Glu);
kusano 7d535a
	    xlusup = (int *)suser_malloc((n+1) * iword, HEAD, Glu);
kusano 7d535a
	    xusub  = (int *)suser_malloc((n+1) * iword, HEAD, Glu);
kusano 7d535a
	}
kusano 7d535a
kusano 7d535a
	lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
kusano 7d535a
	ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
kusano 7d535a
	lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
kusano 7d535a
	usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
kusano 7d535a
kusano 7d535a
	while ( !lusup || !ucol || !lsub || !usub ) {
kusano 7d535a
	    if ( Glu->MemModel == SYSTEM ) {
kusano 7d535a
		SUPERLU_FREE(lusup); 
kusano 7d535a
		SUPERLU_FREE(ucol); 
kusano 7d535a
		SUPERLU_FREE(lsub); 
kusano 7d535a
		SUPERLU_FREE(usub);
kusano 7d535a
	    } else {
kusano 7d535a
		suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword,
kusano 7d535a
                            HEAD, Glu);
kusano 7d535a
	    }
kusano 7d535a
	    nzlumax /= 2;
kusano 7d535a
	    nzumax /= 2;
kusano 7d535a
	    nzlmax /= 2;
kusano 7d535a
	    if ( nzlumax < annz ) {
kusano 7d535a
		printf("Not enough memory to perform factorization.\n");
kusano 7d535a
		return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
kusano 7d535a
	    }
kusano 7d535a
#if ( PRNTlevel >= 1)
kusano 7d535a
	    printf("sLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
kusano 7d535a
		   nzlmax, nzumax);
kusano 7d535a
	    fflush(stdout);
kusano 7d535a
#endif
kusano 7d535a
	    lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
kusano 7d535a
	    ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
kusano 7d535a
	    lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
kusano 7d535a
	    usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
kusano 7d535a
	}
kusano 7d535a
	
kusano 7d535a
    } else {
kusano 7d535a
	/* fact == SamePattern_SameRowPerm */
kusano 7d535a
	Lstore   = L->Store;
kusano 7d535a
	Ustore   = U->Store;
kusano 7d535a
	xsup     = Lstore->sup_to_col;
kusano 7d535a
	supno    = Lstore->col_to_sup;
kusano 7d535a
	xlsub    = Lstore->rowind_colptr;
kusano 7d535a
	xlusup   = Lstore->nzval_colptr;
kusano 7d535a
	xusub    = Ustore->colptr;
kusano 7d535a
	nzlmax   = Glu->nzlmax;    /* max from previous factorization */
kusano 7d535a
	nzumax   = Glu->nzumax;
kusano 7d535a
	nzlumax  = Glu->nzlumax;
kusano 7d535a
	
kusano 7d535a
	if ( lwork == -1 ) {
kusano 7d535a
	    return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
kusano 7d535a
		    + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
kusano 7d535a
        } else if ( lwork == 0 ) {
kusano 7d535a
	    Glu->MemModel = SYSTEM;
kusano 7d535a
	} else {
kusano 7d535a
	    Glu->MemModel = USER;
kusano 7d535a
	    Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */
kusano 7d535a
	    Glu->stack.size = Glu->stack.top2;
kusano 7d535a
	}
kusano 7d535a
	
kusano 7d535a
	lsub  = Glu->expanders[LSUB].mem  = Lstore->rowind;
kusano 7d535a
	lusup = Glu->expanders[LUSUP].mem = Lstore->nzval;
kusano 7d535a
	usub  = Glu->expanders[USUB].mem  = Ustore->rowind;
kusano 7d535a
	ucol  = Glu->expanders[UCOL].mem  = Ustore->nzval;;
kusano 7d535a
	Glu->expanders[LSUB].size         = nzlmax;
kusano 7d535a
	Glu->expanders[LUSUP].size        = nzlumax;
kusano 7d535a
	Glu->expanders[USUB].size         = nzumax;
kusano 7d535a
	Glu->expanders[UCOL].size         = nzumax;	
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    Glu->xsup    = xsup;
kusano 7d535a
    Glu->supno   = supno;
kusano 7d535a
    Glu->lsub    = lsub;
kusano 7d535a
    Glu->xlsub   = xlsub;
kusano 7d535a
    Glu->lusup   = lusup;
kusano 7d535a
    Glu->xlusup  = xlusup;
kusano 7d535a
    Glu->ucol    = ucol;
kusano 7d535a
    Glu->usub    = usub;
kusano 7d535a
    Glu->xusub   = xusub;
kusano 7d535a
    Glu->nzlmax  = nzlmax;
kusano 7d535a
    Glu->nzumax  = nzumax;
kusano 7d535a
    Glu->nzlumax = nzlumax;
kusano 7d535a
    
kusano 7d535a
    info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu);
kusano 7d535a
    if ( info )
kusano 7d535a
	return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
kusano 7d535a
    
kusano 7d535a
    ++Glu->num_expansions;
kusano 7d535a
    return 0;
kusano 7d535a
    
kusano 7d535a
} /* sLUMemInit */
kusano 7d535a
kusano 7d535a
/*! \brief Allocate known working storage. Returns 0 if success, otherwise
kusano 7d535a
   returns the number of bytes allocated so far when failure occurred. */
kusano 7d535a
int
kusano 7d535a
sLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
kusano 7d535a
            float **dworkptr, GlobalLU_t *Glu)
kusano 7d535a
{
kusano 7d535a
    int    isize, dsize, extra;
kusano 7d535a
    float *old_ptr;
kusano 7d535a
    int    maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
kusano 7d535a
           rowblk   = sp_ienv(4);
kusano 7d535a
kusano 7d535a
    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
kusano 7d535a
    dsize = (m * panel_size +
kusano 7d535a
	     NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
kusano 7d535a
    
kusano 7d535a
    if ( Glu->MemModel == SYSTEM ) 
kusano 7d535a
	*iworkptr = (int *) intCalloc(isize/sizeof(int));
kusano 7d535a
    else
kusano 7d535a
	*iworkptr = (int *) suser_malloc(isize, TAIL, Glu);
kusano 7d535a
    if ( ! *iworkptr ) {
kusano 7d535a
	fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n");
kusano 7d535a
	return (isize + n);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    if ( Glu->MemModel == SYSTEM )
kusano 7d535a
	*dworkptr = (float *) SUPERLU_MALLOC(dsize);
kusano 7d535a
    else {
kusano 7d535a
	*dworkptr = (float *) suser_malloc(dsize, TAIL, Glu);
kusano 7d535a
	if ( NotDoubleAlign(*dworkptr) ) {
kusano 7d535a
	    old_ptr = *dworkptr;
kusano 7d535a
	    *dworkptr = (float*) DoubleAlign(*dworkptr);
kusano 7d535a
	    *dworkptr = (float*) ((double*)*dworkptr - 1);
kusano 7d535a
	    extra = (char*)old_ptr - (char*)*dworkptr;
kusano 7d535a
#ifdef DEBUG	    
kusano 7d535a
	    printf("sLUWorkInit: not aligned, extra %d\n", extra);
kusano 7d535a
#endif	    
kusano 7d535a
	    Glu->stack.top2 -= extra;
kusano 7d535a
	    Glu->stack.used += extra;
kusano 7d535a
	}
kusano 7d535a
    }
kusano 7d535a
    if ( ! *dworkptr ) {
kusano 7d535a
	fprintf(stderr, "malloc fails for local dworkptr[].");
kusano 7d535a
	return (isize + dsize + n);
kusano 7d535a
    }
kusano 7d535a
	
kusano 7d535a
    return 0;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief Set up pointers for real working arrays.
kusano 7d535a
 */
kusano 7d535a
void
kusano 7d535a
sSetRWork(int m, int panel_size, float *dworkptr,
kusano 7d535a
	 float **dense, float **tempv)
kusano 7d535a
{
kusano 7d535a
    float zero = 0.0;
kusano 7d535a
kusano 7d535a
    int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
kusano 7d535a
        rowblk   = sp_ienv(4);
kusano 7d535a
    *dense = dworkptr;
kusano 7d535a
    *tempv = *dense + panel_size*m;
kusano 7d535a
    sfill (*dense, m * panel_size, zero);
kusano 7d535a
    sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
kusano 7d535a
}
kusano 7d535a
	
kusano 7d535a
/*! \brief Free the working storage used by factor routines.
kusano 7d535a
 */
kusano 7d535a
void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
kusano 7d535a
{
kusano 7d535a
    if ( Glu->MemModel == SYSTEM ) {
kusano 7d535a
	SUPERLU_FREE (iwork);
kusano 7d535a
	SUPERLU_FREE (dwork);
kusano 7d535a
    } else {
kusano 7d535a
	Glu->stack.used -= (Glu->stack.size - Glu->stack.top2);
kusano 7d535a
	Glu->stack.top2 = Glu->stack.size;
kusano 7d535a
/*	sStackCompress(Glu);  */
kusano 7d535a
    }
kusano 7d535a
    
kusano 7d535a
    SUPERLU_FREE (Glu->expanders);	
kusano 7d535a
    Glu->expanders = NULL;
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/*! \brief Expand the data structures for L and U during the factorization.
kusano 7d535a
 * 
kusano 7d535a
 * 
kusano 7d535a
 * Return value:   0 - successful return
kusano 7d535a
 *               > 0 - number of bytes allocated when run out of space
kusano 7d535a
 * 
kusano 7d535a
 */
kusano 7d535a
int
kusano 7d535a
sLUMemXpand(int jcol,
kusano 7d535a
	   int next,          /* number of elements currently in the factors */
kusano 7d535a
	   MemType mem_type,  /* which type of memory to expand  */
kusano 7d535a
	   int *maxlen,       /* modified - maximum length of a data structure */
kusano 7d535a
	   GlobalLU_t *Glu    /* modified - global LU data structures */
kusano 7d535a
	   )
kusano 7d535a
{
kusano 7d535a
    void   *new_mem;
kusano 7d535a
    
kusano 7d535a
#ifdef DEBUG    
kusano 7d535a
    printf("sLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
kusano 7d535a
	   jcol, next, *maxlen, mem_type);
kusano 7d535a
#endif    
kusano 7d535a
kusano 7d535a
    if (mem_type == USUB) 
kusano 7d535a
    	new_mem = sexpand(maxlen, mem_type, next, 1, Glu);
kusano 7d535a
    else
kusano 7d535a
	new_mem = sexpand(maxlen, mem_type, next, 0, Glu);
kusano 7d535a
    
kusano 7d535a
    if ( !new_mem ) {
kusano 7d535a
	int    nzlmax  = Glu->nzlmax;
kusano 7d535a
	int    nzumax  = Glu->nzumax;
kusano 7d535a
	int    nzlumax = Glu->nzlumax;
kusano 7d535a
    	fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
kusano 7d535a
    	return (smemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    switch ( mem_type ) {
kusano 7d535a
      case LUSUP:
kusano 7d535a
	Glu->lusup   = (float *) new_mem;
kusano 7d535a
	Glu->nzlumax = *maxlen;
kusano 7d535a
	break;
kusano 7d535a
      case UCOL:
kusano 7d535a
	Glu->ucol   = (float *) new_mem;
kusano 7d535a
	Glu->nzumax = *maxlen;
kusano 7d535a
	break;
kusano 7d535a
      case LSUB:
kusano 7d535a
	Glu->lsub   = (int *) new_mem;
kusano 7d535a
	Glu->nzlmax = *maxlen;
kusano 7d535a
	break;
kusano 7d535a
      case USUB:
kusano 7d535a
	Glu->usub   = (int *) new_mem;
kusano 7d535a
	Glu->nzumax = *maxlen;
kusano 7d535a
	break;
kusano 7d535a
    }
kusano 7d535a
    
kusano 7d535a
    return 0;
kusano 7d535a
    
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
kusano 7d535a
void
kusano 7d535a
copy_mem_float(int howmany, void *old, void *new)
kusano 7d535a
{
kusano 7d535a
    register int i;
kusano 7d535a
    float *dold = old;
kusano 7d535a
    float *dnew = new;
kusano 7d535a
    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/*! \brief Expand the existing storage to accommodate more fill-ins.
kusano 7d535a
 */
kusano 7d535a
void
kusano 7d535a
*sexpand (
kusano 7d535a
	 int *prev_len,   /* length used from previous call */
kusano 7d535a
	 MemType type,    /* which part of the memory to expand */
kusano 7d535a
	 int len_to_copy, /* size of the memory to be copied to new store */
kusano 7d535a
	 int keep_prev,   /* = 1: use prev_len;
kusano 7d535a
			     = 0: compute new_len to expand */
kusano 7d535a
	 GlobalLU_t *Glu  /* modified - global LU data structures */
kusano 7d535a
	)
kusano 7d535a
{
kusano 7d535a
    float    EXPAND = 1.5;
kusano 7d535a
    float    alpha;
kusano 7d535a
    void     *new_mem, *old_mem;
kusano 7d535a
    int      new_len, tries, lword, extra, bytes_to_copy;
kusano 7d535a
    ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */
kusano 7d535a
kusano 7d535a
    alpha = EXPAND;
kusano 7d535a
kusano 7d535a
    if ( Glu->num_expansions == 0 || keep_prev ) {
kusano 7d535a
        /* First time allocate requested */
kusano 7d535a
        new_len = *prev_len;
kusano 7d535a
    } else {
kusano 7d535a
	new_len = alpha * *prev_len;
kusano 7d535a
    }
kusano 7d535a
    
kusano 7d535a
    if ( type == LSUB || type == USUB ) lword = sizeof(int);
kusano 7d535a
    else lword = sizeof(float);
kusano 7d535a
kusano 7d535a
    if ( Glu->MemModel == SYSTEM ) {
kusano 7d535a
	new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
kusano 7d535a
	if ( Glu->num_expansions != 0 ) {
kusano 7d535a
	    tries = 0;
kusano 7d535a
	    if ( keep_prev ) {
kusano 7d535a
		if ( !new_mem ) return (NULL);
kusano 7d535a
	    } else {
kusano 7d535a
		while ( !new_mem ) {
kusano 7d535a
		    if ( ++tries > 10 ) return (NULL);
kusano 7d535a
		    alpha = Reduce(alpha);
kusano 7d535a
		    new_len = alpha * *prev_len;
kusano 7d535a
		    new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
kusano 7d535a
		}
kusano 7d535a
	    }
kusano 7d535a
	    if ( type == LSUB || type == USUB ) {
kusano 7d535a
		copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
kusano 7d535a
	    } else {
kusano 7d535a
		copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
kusano 7d535a
	    }
kusano 7d535a
	    SUPERLU_FREE (expanders[type].mem);
kusano 7d535a
	}
kusano 7d535a
	expanders[type].mem = (void *) new_mem;
kusano 7d535a
	
kusano 7d535a
    } else { /* MemModel == USER */
kusano 7d535a
	if ( Glu->num_expansions == 0 ) {
kusano 7d535a
	    new_mem = suser_malloc(new_len * lword, HEAD, Glu);
kusano 7d535a
	    if ( NotDoubleAlign(new_mem) &&
kusano 7d535a
		(type == LUSUP || type == UCOL) ) {
kusano 7d535a
		old_mem = new_mem;
kusano 7d535a
		new_mem = (void *)DoubleAlign(new_mem);
kusano 7d535a
		extra = (char*)new_mem - (char*)old_mem;
kusano 7d535a
#ifdef DEBUG		
kusano 7d535a
		printf("expand(): not aligned, extra %d\n", extra);
kusano 7d535a
#endif		
kusano 7d535a
		Glu->stack.top1 += extra;
kusano 7d535a
		Glu->stack.used += extra;
kusano 7d535a
	    }
kusano 7d535a
	    expanders[type].mem = (void *) new_mem;
kusano 7d535a
	} else {
kusano 7d535a
	    tries = 0;
kusano 7d535a
	    extra = (new_len - *prev_len) * lword;
kusano 7d535a
	    if ( keep_prev ) {
kusano 7d535a
		if ( StackFull(extra) ) return (NULL);
kusano 7d535a
	    } else {
kusano 7d535a
		while ( StackFull(extra) ) {
kusano 7d535a
		    if ( ++tries > 10 ) return (NULL);
kusano 7d535a
		    alpha = Reduce(alpha);
kusano 7d535a
		    new_len = alpha * *prev_len;
kusano 7d535a
		    extra = (new_len - *prev_len) * lword;	    
kusano 7d535a
		}
kusano 7d535a
	    }
kusano 7d535a
kusano 7d535a
	    if ( type != USUB ) {
kusano 7d535a
		new_mem = (void*)((char*)expanders[type + 1].mem + extra);
kusano 7d535a
		bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1
kusano 7d535a
		    - (char*)expanders[type + 1].mem;
kusano 7d535a
		user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
kusano 7d535a
kusano 7d535a
		if ( type < USUB ) {
kusano 7d535a
		    Glu->usub = expanders[USUB].mem =
kusano 7d535a
			(void*)((char*)expanders[USUB].mem + extra);
kusano 7d535a
		}
kusano 7d535a
		if ( type < LSUB ) {
kusano 7d535a
		    Glu->lsub = expanders[LSUB].mem =
kusano 7d535a
			(void*)((char*)expanders[LSUB].mem + extra);
kusano 7d535a
		}
kusano 7d535a
		if ( type < UCOL ) {
kusano 7d535a
		    Glu->ucol = expanders[UCOL].mem =
kusano 7d535a
			(void*)((char*)expanders[UCOL].mem + extra);
kusano 7d535a
		}
kusano 7d535a
		Glu->stack.top1 += extra;
kusano 7d535a
		Glu->stack.used += extra;
kusano 7d535a
		if ( type == UCOL ) {
kusano 7d535a
		    Glu->stack.top1 += extra;   /* Add same amount for USUB */
kusano 7d535a
		    Glu->stack.used += extra;
kusano 7d535a
		}
kusano 7d535a
		
kusano 7d535a
	    } /* if ... */
kusano 7d535a
kusano 7d535a
	} /* else ... */
kusano 7d535a
    }
kusano 7d535a
kusano 7d535a
    expanders[type].size = new_len;
kusano 7d535a
    *prev_len = new_len;
kusano 7d535a
    if ( Glu->num_expansions ) ++Glu->num_expansions;
kusano 7d535a
    
kusano 7d535a
    return (void *) expanders[type].mem;
kusano 7d535a
    
kusano 7d535a
} /* sexpand */
kusano 7d535a
kusano 7d535a
kusano 7d535a
/*! \brief Compress the work[] array to remove fragmentation.
kusano 7d535a
 */
kusano 7d535a
void
kusano 7d535a
sStackCompress(GlobalLU_t *Glu)
kusano 7d535a
{
kusano 7d535a
    register int iword, dword, ndim;
kusano 7d535a
    char    *last, *fragment;
kusano 7d535a
    int      *ifrom, *ito;
kusano 7d535a
    float   *dfrom, *dto;
kusano 7d535a
    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
kusano 7d535a
    float   *ucol, *lusup;
kusano 7d535a
    
kusano 7d535a
    iword = sizeof(int);
kusano 7d535a
    dword = sizeof(float);
kusano 7d535a
    ndim = Glu->n;
kusano 7d535a
kusano 7d535a
    xlsub  = Glu->xlsub;
kusano 7d535a
    lsub   = Glu->lsub;
kusano 7d535a
    xusub  = Glu->xusub;
kusano 7d535a
    usub   = Glu->usub;
kusano 7d535a
    xlusup = Glu->xlusup;
kusano 7d535a
    ucol   = Glu->ucol;
kusano 7d535a
    lusup  = Glu->lusup;
kusano 7d535a
    
kusano 7d535a
    dfrom = ucol;
kusano 7d535a
    dto = (float *)((char*)lusup + xlusup[ndim] * dword);
kusano 7d535a
    copy_mem_float(xusub[ndim], dfrom, dto);
kusano 7d535a
    ucol = dto;
kusano 7d535a
kusano 7d535a
    ifrom = lsub;
kusano 7d535a
    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
kusano 7d535a
    copy_mem_int(xlsub[ndim], ifrom, ito);
kusano 7d535a
    lsub = ito;
kusano 7d535a
    
kusano 7d535a
    ifrom = usub;
kusano 7d535a
    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
kusano 7d535a
    copy_mem_int(xusub[ndim], ifrom, ito);
kusano 7d535a
    usub = ito;
kusano 7d535a
    
kusano 7d535a
    last = (char*)usub + xusub[ndim] * iword;
kusano 7d535a
    fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last);
kusano 7d535a
    Glu->stack.used -= (long int) fragment;
kusano 7d535a
    Glu->stack.top1 -= (long int) fragment;
kusano 7d535a
kusano 7d535a
    Glu->ucol = ucol;
kusano 7d535a
    Glu->lsub = lsub;
kusano 7d535a
    Glu->usub = usub;
kusano 7d535a
    
kusano 7d535a
#ifdef DEBUG
kusano 7d535a
    printf("sStackCompress: fragment %d\n", fragment);
kusano 7d535a
    /* for (last = 0; last < ndim; ++last)
kusano 7d535a
	print_lu_col("After compress:", last, 0);*/
kusano 7d535a
#endif    
kusano 7d535a
    
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
/*! \brief Allocate storage for original matrix A
kusano 7d535a
 */
kusano 7d535a
void
kusano 7d535a
sallocateA(int n, int nnz, float **a, int **asub, int **xa)
kusano 7d535a
{
kusano 7d535a
    *a    = (float *) floatMalloc(nnz);
kusano 7d535a
    *asub = (int *) intMalloc(nnz);
kusano 7d535a
    *xa   = (int *) intMalloc(n+1);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
float *floatMalloc(int n)
kusano 7d535a
{
kusano 7d535a
    float *buf;
kusano 7d535a
    buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float)); 
kusano 7d535a
    if ( !buf ) {
kusano 7d535a
	ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
kusano 7d535a
    }
kusano 7d535a
    return (buf);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
float *floatCalloc(int n)
kusano 7d535a
{
kusano 7d535a
    float *buf;
kusano 7d535a
    register int i;
kusano 7d535a
    float zero = 0.0;
kusano 7d535a
    buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float));
kusano 7d535a
    if ( !buf ) {
kusano 7d535a
	ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
kusano 7d535a
    }
kusano 7d535a
    for (i = 0; i < n; ++i) buf[i] = zero;
kusano 7d535a
    return (buf);
kusano 7d535a
}
kusano 7d535a
kusano 7d535a
kusano 7d535a
int smemory_usage(const int nzlmax, const int nzumax, 
kusano 7d535a
		  const int nzlumax, const int n)
kusano 7d535a
{
kusano 7d535a
    register int iword, dword;
kusano 7d535a
kusano 7d535a
    iword   = sizeof(int);
kusano 7d535a
    dword   = sizeof(float);
kusano 7d535a
    
kusano 7d535a
    return (10 * n * iword +
kusano 7d535a
	    nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
kusano 7d535a
kusano 7d535a
}